diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..bd440b5ecd6c9d951fbb73b4cc25798426a98b36 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,44 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-10/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-110/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-120/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-130/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-140/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-150/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-160/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-170/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-180/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-190/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-20/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-210/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-220/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-230/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-240/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-250/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-260/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-270/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-280/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-290/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-30/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-310/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-320/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-330/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-340/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-350/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-360/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-370/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-380/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-390/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-397/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-40/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-50/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-60/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-70/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-80/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-90/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2cb5e63be461f902888c6376e683ec07e8b60ccd --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:775110dea61026c476be64640336bc82938ab26937f74e0e54a10b81be094570 +size 41248 diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-10/README.md b/checkpoints/checkpoint-10/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-10/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-10/adapter_config.json b/checkpoints/checkpoint-10/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-10/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-10/adapter_model.safetensors b/checkpoints/checkpoint-10/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61e4d96ba68eccb0fd4d497dc99ab507f57d3222 --- /dev/null +++ b/checkpoints/checkpoint-10/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48ad18b3d2ad487d485d4f20069e8dac55484f3184fa342045ddf793262dbff4 +size 41248 diff --git a/checkpoints/checkpoint-10/chat_template.jinja b/checkpoints/checkpoint-10/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-10/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-10/optimizer.pt b/checkpoints/checkpoint-10/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a33d04929d65fde370716bd76f9677ea21a7e8f --- /dev/null +++ b/checkpoints/checkpoint-10/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4acd87c38c9ec02fb6a5ae6cd704a83d4ef50e33e22813dc83be01d054e3f46 +size 38889 diff --git a/checkpoints/checkpoint-10/rng_state.pth b/checkpoints/checkpoint-10/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-10/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-10/scheduler.pt b/checkpoints/checkpoint-10/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ee408506b56aa6f7290a6b3ecb2c8ec7ec80402 --- /dev/null +++ b/checkpoints/checkpoint-10/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f80fb384c7c6b2fc4b769cc3ff7aa3e3b89127590ab5ff19e1d709da4b70c9ca +size 1465 diff --git a/checkpoints/checkpoint-10/special_tokens_map.json b/checkpoints/checkpoint-10/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-10/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-10/tokenizer.json b/checkpoints/checkpoint-10/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-10/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-10/tokenizer_config.json b/checkpoints/checkpoint-10/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-10/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-10/trainer_state.json b/checkpoints/checkpoint-10/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e41e6bd7d707ae9976bd0cebbe98c73d8b8c8949 --- /dev/null +++ b/checkpoints/checkpoint-10/trainer_state.json @@ -0,0 +1,104 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.025220680958385876, + "eval_steps": 100, + "global_step": 10, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 145401236865024.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-10/training_args.bin b/checkpoints/checkpoint-10/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-10/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-100/README.md b/checkpoints/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-100/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-100/adapter_config.json b/checkpoints/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-100/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-100/adapter_model.safetensors b/checkpoints/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bfc257fdbbe57bd54fe8da205f78b37afaf6ebdd --- /dev/null +++ b/checkpoints/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3532a28ed3a397164cb87a16d43a6ed2372c795178ac37b371d654f66481b88 +size 41248 diff --git a/checkpoints/checkpoint-100/chat_template.jinja b/checkpoints/checkpoint-100/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-100/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-100/optimizer.pt b/checkpoints/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f96c701bbb2ca197a04fbed4e3599cf3f919ad00 --- /dev/null +++ b/checkpoints/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3657625920577b4f878af998b58895c1a297a7c1bd5f32d1bd36079093a83ec8 +size 38889 diff --git a/checkpoints/checkpoint-100/rng_state.pth b/checkpoints/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-100/scheduler.pt b/checkpoints/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a2dc200a29e89a655f4825e0c79149c8a1651ce --- /dev/null +++ b/checkpoints/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6702d7b06eb26cdff4035f420aa5101b754c0723ec91a902ea6577e82c1ee7c +size 1465 diff --git a/checkpoints/checkpoint-100/special_tokens_map.json b/checkpoints/checkpoint-100/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-100/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-100/tokenizer.json b/checkpoints/checkpoint-100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-100/tokenizer_config.json b/checkpoints/checkpoint-100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-100/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-100/trainer_state.json b/checkpoints/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6e539345bdd49a52f8ad59ed93670bc0b89be4c0 --- /dev/null +++ b/checkpoints/checkpoint-100/trainer_state.json @@ -0,0 +1,742 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.25220680958385877, + "eval_steps": 100, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1449025915502592.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-100/training_args.bin b/checkpoints/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-110/README.md b/checkpoints/checkpoint-110/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-110/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-110/adapter_config.json b/checkpoints/checkpoint-110/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-110/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-110/adapter_model.safetensors b/checkpoints/checkpoint-110/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a4f84fe49b3ad95b124699b40bd0b40a62ea82e --- /dev/null +++ b/checkpoints/checkpoint-110/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0161c441fece0cbcf7baf2df5f3fe50058abe4ed0a739b6d2cb31cf021b861c +size 41248 diff --git a/checkpoints/checkpoint-110/chat_template.jinja b/checkpoints/checkpoint-110/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-110/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-110/optimizer.pt b/checkpoints/checkpoint-110/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..dabf5bebd90d2a1ec5319567229f0d47c59aea83 --- /dev/null +++ b/checkpoints/checkpoint-110/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a39e935d6aeecd7b2f705364746a9af38b0478bfc581f30168cb2b4f89692183 +size 38889 diff --git a/checkpoints/checkpoint-110/rng_state.pth b/checkpoints/checkpoint-110/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-110/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-110/scheduler.pt b/checkpoints/checkpoint-110/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b50f5d13ca7b80bdb9c6487ad3fb04d269d6e7d3 --- /dev/null +++ b/checkpoints/checkpoint-110/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c579670112e91f1ab565d1c7f636a9a46e1705f4efd7df06429ef02890e532f0 +size 1465 diff --git a/checkpoints/checkpoint-110/special_tokens_map.json b/checkpoints/checkpoint-110/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-110/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-110/tokenizer.json b/checkpoints/checkpoint-110/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-110/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-110/tokenizer_config.json b/checkpoints/checkpoint-110/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-110/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-110/trainer_state.json b/checkpoints/checkpoint-110/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..488a57c860fa6e2b26741b53bf54f6491cea4432 --- /dev/null +++ b/checkpoints/checkpoint-110/trainer_state.json @@ -0,0 +1,812 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.27742749054224464, + "eval_steps": 100, + "global_step": 110, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1596832794869760.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-110/training_args.bin b/checkpoints/checkpoint-110/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-110/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-120/README.md b/checkpoints/checkpoint-120/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-120/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-120/adapter_config.json b/checkpoints/checkpoint-120/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-120/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-120/adapter_model.safetensors b/checkpoints/checkpoint-120/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6d6a9faf283c09886592e46eb7980545d1e7b0c6 --- /dev/null +++ b/checkpoints/checkpoint-120/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0914aca527eb58b66b31b9cd693de008871177cb944808599c206063115f6b4a +size 41248 diff --git a/checkpoints/checkpoint-120/chat_template.jinja b/checkpoints/checkpoint-120/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-120/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-120/optimizer.pt b/checkpoints/checkpoint-120/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..431bf555d32d0e2dbd55caea258bdc8b4cfac037 --- /dev/null +++ b/checkpoints/checkpoint-120/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e25911dab5bfff40d33c2a7584dc27878adf82d9485a628374700bbb012e9df8 +size 38889 diff --git a/checkpoints/checkpoint-120/rng_state.pth b/checkpoints/checkpoint-120/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-120/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-120/scheduler.pt b/checkpoints/checkpoint-120/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b180ad2cdd33e489dde6b95e595884514dcdf2e --- /dev/null +++ b/checkpoints/checkpoint-120/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f837a3c6fcb33b37029da4c29789a9428e4c4b51508d6cf78aac3e04bd26b5d +size 1465 diff --git a/checkpoints/checkpoint-120/special_tokens_map.json b/checkpoints/checkpoint-120/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-120/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-120/tokenizer.json b/checkpoints/checkpoint-120/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-120/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-120/tokenizer_config.json b/checkpoints/checkpoint-120/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-120/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-120/trainer_state.json b/checkpoints/checkpoint-120/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..576718dd92e2103607744de39e4218e4a2e734a4 --- /dev/null +++ b/checkpoints/checkpoint-120/trainer_state.json @@ -0,0 +1,882 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3026481715006305, + "eval_steps": 100, + "global_step": 120, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1741626782171136.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-120/training_args.bin b/checkpoints/checkpoint-120/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-120/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-130/README.md b/checkpoints/checkpoint-130/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-130/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-130/adapter_config.json b/checkpoints/checkpoint-130/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-130/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-130/adapter_model.safetensors b/checkpoints/checkpoint-130/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3a2fce41dece9458ec3066832f73c5c7f79e0ef --- /dev/null +++ b/checkpoints/checkpoint-130/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5609f1e6854d6b12c65cd2759021fa74039ee98994751e8bb030e1bdef5503da +size 41248 diff --git a/checkpoints/checkpoint-130/chat_template.jinja b/checkpoints/checkpoint-130/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-130/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-130/optimizer.pt b/checkpoints/checkpoint-130/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..921fa4877d0a9e8024f97101d3250317c63dfa82 --- /dev/null +++ b/checkpoints/checkpoint-130/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b24eaa86a17d73184a80467be3afe9216e8c00e9885c242aa4f470ad9a82595a +size 38889 diff --git a/checkpoints/checkpoint-130/rng_state.pth b/checkpoints/checkpoint-130/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-130/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-130/scheduler.pt b/checkpoints/checkpoint-130/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..41508daf3d9118f3aee6b5e02c54c4c7f3122f54 --- /dev/null +++ b/checkpoints/checkpoint-130/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2940337cf97010db8176d334bcbb9aa0df5ae47aa1f960bdef2469f602d5f30 +size 1465 diff --git a/checkpoints/checkpoint-130/special_tokens_map.json b/checkpoints/checkpoint-130/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-130/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-130/tokenizer.json b/checkpoints/checkpoint-130/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-130/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-130/tokenizer_config.json b/checkpoints/checkpoint-130/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-130/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-130/trainer_state.json b/checkpoints/checkpoint-130/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8abd4f67297eb960d07e33178b392789bbb0b33a --- /dev/null +++ b/checkpoints/checkpoint-130/trainer_state.json @@ -0,0 +1,952 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.32786885245901637, + "eval_steps": 100, + "global_step": 130, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1888137417277440.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-130/training_args.bin b/checkpoints/checkpoint-130/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-130/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-140/README.md b/checkpoints/checkpoint-140/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-140/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-140/adapter_config.json b/checkpoints/checkpoint-140/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-140/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-140/adapter_model.safetensors b/checkpoints/checkpoint-140/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07a4e978a5cace4b61eec9a8f31bceb3f5b2e26a --- /dev/null +++ b/checkpoints/checkpoint-140/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d700d17d0a06bdea84b573aed30aab957b80949cf216527740df49dad69fdcdd +size 41248 diff --git a/checkpoints/checkpoint-140/chat_template.jinja b/checkpoints/checkpoint-140/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-140/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-140/optimizer.pt b/checkpoints/checkpoint-140/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8927056a16b2ef524781773cb92ab02f8c94788 --- /dev/null +++ b/checkpoints/checkpoint-140/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55d02f3b40849873a75849c33301dcdbea84c5d247f40be731320a064fe92fcd +size 38889 diff --git a/checkpoints/checkpoint-140/rng_state.pth b/checkpoints/checkpoint-140/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-140/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-140/scheduler.pt b/checkpoints/checkpoint-140/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc41d46f496dd65e5dd0ad537ae72d20c6b1c24b --- /dev/null +++ b/checkpoints/checkpoint-140/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b1218e825b22bccb6f4fc5d47b9f4bc025a0fa0b82be351c5c0dde4212e1da3 +size 1465 diff --git a/checkpoints/checkpoint-140/special_tokens_map.json b/checkpoints/checkpoint-140/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-140/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-140/tokenizer.json b/checkpoints/checkpoint-140/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-140/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-140/tokenizer_config.json b/checkpoints/checkpoint-140/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-140/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-140/trainer_state.json b/checkpoints/checkpoint-140/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a39203d400ec18f8975b10bd414d8f0b77459912 --- /dev/null +++ b/checkpoints/checkpoint-140/trainer_state.json @@ -0,0 +1,1022 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3530895334174023, + "eval_steps": 100, + "global_step": 140, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2035932618768384.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-140/training_args.bin b/checkpoints/checkpoint-140/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-140/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-150/README.md b/checkpoints/checkpoint-150/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-150/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-150/adapter_config.json b/checkpoints/checkpoint-150/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-150/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-150/adapter_model.safetensors b/checkpoints/checkpoint-150/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b9271e8156d21e9853dd112a69c59381924078c --- /dev/null +++ b/checkpoints/checkpoint-150/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c1e609291545d719eff5d73d8b88399adcd9a4d75d57d09a0499d6c2889b93d +size 41248 diff --git a/checkpoints/checkpoint-150/chat_template.jinja b/checkpoints/checkpoint-150/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-150/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-150/optimizer.pt b/checkpoints/checkpoint-150/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..33fc0fd4126677459d8a1a559ec901b20eb41c81 --- /dev/null +++ b/checkpoints/checkpoint-150/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a18f6859797fbda7cf41ed705cc6d892350a8d60081e0f3bb5e1c41bb8cb789 +size 38889 diff --git a/checkpoints/checkpoint-150/rng_state.pth b/checkpoints/checkpoint-150/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-150/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-150/scheduler.pt b/checkpoints/checkpoint-150/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac6e4fdc9db1809ce0fc85d3274486396f84b711 --- /dev/null +++ b/checkpoints/checkpoint-150/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1577eec1a6e0dcadc696543d7c691e8c0d5ad6de5714ef1d566b91186063677 +size 1465 diff --git a/checkpoints/checkpoint-150/special_tokens_map.json b/checkpoints/checkpoint-150/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-150/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-150/tokenizer.json b/checkpoints/checkpoint-150/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-150/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-150/tokenizer_config.json b/checkpoints/checkpoint-150/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-150/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-150/trainer_state.json b/checkpoints/checkpoint-150/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a9d313bd7924b1e8f0dd3bcd2277ca18ea5b9987 --- /dev/null +++ b/checkpoints/checkpoint-150/trainer_state.json @@ -0,0 +1,1092 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.37831021437578816, + "eval_steps": 100, + "global_step": 150, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2181777614929920.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-150/training_args.bin b/checkpoints/checkpoint-150/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-150/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-160/README.md b/checkpoints/checkpoint-160/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-160/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-160/adapter_config.json b/checkpoints/checkpoint-160/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-160/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-160/adapter_model.safetensors b/checkpoints/checkpoint-160/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b665c9282055b9adb6ba52564c367be10d4c888 --- /dev/null +++ b/checkpoints/checkpoint-160/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe7507ab7d2ba79e094dae603aefe6a137a6631942fff401c5073400ade4aedd +size 41248 diff --git a/checkpoints/checkpoint-160/chat_template.jinja b/checkpoints/checkpoint-160/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-160/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-160/optimizer.pt b/checkpoints/checkpoint-160/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..655a2ed941456de6ac5b7bf799620e8df9533210 --- /dev/null +++ b/checkpoints/checkpoint-160/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcf6ac2a8de2a4d4ac382e09c79b99dde2b159be0e3aa02ba2f55c2ef559cff7 +size 38889 diff --git a/checkpoints/checkpoint-160/rng_state.pth b/checkpoints/checkpoint-160/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-160/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-160/scheduler.pt b/checkpoints/checkpoint-160/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..11d4c33a69ab2b72bb7ff8886933dc4ddcd195c5 --- /dev/null +++ b/checkpoints/checkpoint-160/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86b6652fb5b5dff04fe4127b8c325c611b1eb8148abfc3663612a4e09ca8299d +size 1465 diff --git a/checkpoints/checkpoint-160/special_tokens_map.json b/checkpoints/checkpoint-160/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-160/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-160/tokenizer.json b/checkpoints/checkpoint-160/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-160/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-160/tokenizer_config.json b/checkpoints/checkpoint-160/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-160/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-160/trainer_state.json b/checkpoints/checkpoint-160/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c455fd2fd345c00899b5b97e116fddc0770d435d --- /dev/null +++ b/checkpoints/checkpoint-160/trainer_state.json @@ -0,0 +1,1162 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.403530895334174, + "eval_steps": 100, + "global_step": 160, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2330296844746752.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-160/training_args.bin b/checkpoints/checkpoint-160/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-160/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-170/README.md b/checkpoints/checkpoint-170/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-170/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-170/adapter_config.json b/checkpoints/checkpoint-170/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-170/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-170/adapter_model.safetensors b/checkpoints/checkpoint-170/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8927b9076d5a7584d2a658cb556ee7e845556252 --- /dev/null +++ b/checkpoints/checkpoint-170/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a8fee06525c47bb7ef4951e16a53ef872a3b6a4fa1f6ff05ed18b6b8dfb0dd9 +size 41248 diff --git a/checkpoints/checkpoint-170/chat_template.jinja b/checkpoints/checkpoint-170/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-170/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-170/optimizer.pt b/checkpoints/checkpoint-170/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4ab31820459b003d5f1d7c7814dad3fb922b40c --- /dev/null +++ b/checkpoints/checkpoint-170/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e16dffa63a287bbb7667e0a4d75f8206ba145902764402d4f4b490b6b098f75c +size 38889 diff --git a/checkpoints/checkpoint-170/rng_state.pth b/checkpoints/checkpoint-170/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-170/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-170/scheduler.pt b/checkpoints/checkpoint-170/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..aef779fcd067bf2570c89eab1378929c850cc9d2 --- /dev/null +++ b/checkpoints/checkpoint-170/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f389aeb1721c25a07db709896a50e83edee3bd6414e4d22ad9a4e315cf219c2a +size 1465 diff --git a/checkpoints/checkpoint-170/special_tokens_map.json b/checkpoints/checkpoint-170/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-170/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-170/tokenizer.json b/checkpoints/checkpoint-170/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-170/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-170/tokenizer_config.json b/checkpoints/checkpoint-170/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-170/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-170/trainer_state.json b/checkpoints/checkpoint-170/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d704be3d6e5f775311977aa87196e17b04b03de9 --- /dev/null +++ b/checkpoints/checkpoint-170/trainer_state.json @@ -0,0 +1,1232 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.4287515762925599, + "eval_steps": 100, + "global_step": 170, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2475032442667008.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-170/training_args.bin b/checkpoints/checkpoint-170/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-170/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-180/README.md b/checkpoints/checkpoint-180/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-180/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-180/adapter_config.json b/checkpoints/checkpoint-180/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-180/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-180/adapter_model.safetensors b/checkpoints/checkpoint-180/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4371c39a0c27d0492105f357fdb179212d7e109d --- /dev/null +++ b/checkpoints/checkpoint-180/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42a5094bebeb9e4f14f3ebeec1ba700447428c2df067d53a2ee296657c598827 +size 41248 diff --git a/checkpoints/checkpoint-180/chat_template.jinja b/checkpoints/checkpoint-180/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-180/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-180/optimizer.pt b/checkpoints/checkpoint-180/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..91706e04297f3d4fe6844d2ff13a0c65828dbfc6 --- /dev/null +++ b/checkpoints/checkpoint-180/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:897f43b67df7d8261a16cb2fdd492086769820692da2a27c14de48e4b28444d0 +size 38889 diff --git a/checkpoints/checkpoint-180/rng_state.pth b/checkpoints/checkpoint-180/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-180/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-180/scheduler.pt b/checkpoints/checkpoint-180/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0233fae2f0cff9a5bc0c61015b584670f8cf1dfe --- /dev/null +++ b/checkpoints/checkpoint-180/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb8435f832c953ffd48b0dd8c7e72c1be53fea0841335092a2e03d8352ddaf8e +size 1465 diff --git a/checkpoints/checkpoint-180/special_tokens_map.json b/checkpoints/checkpoint-180/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-180/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-180/tokenizer.json b/checkpoints/checkpoint-180/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-180/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-180/tokenizer_config.json b/checkpoints/checkpoint-180/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-180/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-180/trainer_state.json b/checkpoints/checkpoint-180/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b6ec45d9214d2e876ee87f8e969531534d791df7 --- /dev/null +++ b/checkpoints/checkpoint-180/trainer_state.json @@ -0,0 +1,1302 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.45397225725094575, + "eval_steps": 100, + "global_step": 180, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2622302139727872.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-180/training_args.bin b/checkpoints/checkpoint-180/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-180/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-190/README.md b/checkpoints/checkpoint-190/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-190/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-190/adapter_config.json b/checkpoints/checkpoint-190/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-190/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-190/adapter_model.safetensors b/checkpoints/checkpoint-190/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9d6af40ae719631867dc959ab3dc656b50e701b --- /dev/null +++ b/checkpoints/checkpoint-190/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d61f492b2f11e88f95605af1df5424aad47331e6dfd549824a96d33d7ce9502c +size 41248 diff --git a/checkpoints/checkpoint-190/chat_template.jinja b/checkpoints/checkpoint-190/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-190/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-190/optimizer.pt b/checkpoints/checkpoint-190/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..42396ee8e23f1c0477ca065ac34fe3da08b2fa12 --- /dev/null +++ b/checkpoints/checkpoint-190/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc46755f30bc92063150ea35c7b35a770a73d11d566c77d416c50bfd24ffb415 +size 38889 diff --git a/checkpoints/checkpoint-190/rng_state.pth b/checkpoints/checkpoint-190/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-190/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-190/scheduler.pt b/checkpoints/checkpoint-190/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a63cd71a12f1704f4cf910b01dc4d0ccac9e3e67 --- /dev/null +++ b/checkpoints/checkpoint-190/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d20bfee158d956b1a68ab8913dc87d6e67c838b80c163a0e813f60abce40427 +size 1465 diff --git a/checkpoints/checkpoint-190/special_tokens_map.json b/checkpoints/checkpoint-190/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-190/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-190/tokenizer.json b/checkpoints/checkpoint-190/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-190/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-190/tokenizer_config.json b/checkpoints/checkpoint-190/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-190/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-190/trainer_state.json b/checkpoints/checkpoint-190/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f44dff01c5368dd3cf464b8a1bfbe43e026c8584 --- /dev/null +++ b/checkpoints/checkpoint-190/trainer_state.json @@ -0,0 +1,1372 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.4791929382093317, + "eval_steps": 100, + "global_step": 190, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2768625928814592.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-190/training_args.bin b/checkpoints/checkpoint-190/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-190/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-20/README.md b/checkpoints/checkpoint-20/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-20/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-20/adapter_config.json b/checkpoints/checkpoint-20/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-20/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-20/adapter_model.safetensors b/checkpoints/checkpoint-20/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0378daa8e93a65a538e8eef28aab4c84736d1d60 --- /dev/null +++ b/checkpoints/checkpoint-20/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:137bd06159d3a768715197c2e9f6868e7e0bc103d4469fdaab18f6e16177ea9e +size 41248 diff --git a/checkpoints/checkpoint-20/chat_template.jinja b/checkpoints/checkpoint-20/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-20/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-20/optimizer.pt b/checkpoints/checkpoint-20/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c6858b6fecdf7df91c93f35f3f0564633c552af --- /dev/null +++ b/checkpoints/checkpoint-20/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ca942418f91ece2050b64672e0efc0d005e499cb9de1e32a8315d7008ce9388 +size 38889 diff --git a/checkpoints/checkpoint-20/rng_state.pth b/checkpoints/checkpoint-20/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-20/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-20/scheduler.pt b/checkpoints/checkpoint-20/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..53ca2af2a2ea224ddb017520a85c7ae74781d248 --- /dev/null +++ b/checkpoints/checkpoint-20/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b67669993b1be8afc318a211bb6d47e3c53c613107b1ad38bf27354e307377 +size 1465 diff --git a/checkpoints/checkpoint-20/special_tokens_map.json b/checkpoints/checkpoint-20/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-20/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-20/tokenizer.json b/checkpoints/checkpoint-20/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-20/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-20/tokenizer_config.json b/checkpoints/checkpoint-20/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-20/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-20/trainer_state.json b/checkpoints/checkpoint-20/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..631365432134db484e25d9397532a956c776c810 --- /dev/null +++ b/checkpoints/checkpoint-20/trainer_state.json @@ -0,0 +1,174 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.05044136191677175, + "eval_steps": 100, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 291584891437056.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-20/training_args.bin b/checkpoints/checkpoint-20/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-20/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-200/README.md b/checkpoints/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-200/adapter_config.json b/checkpoints/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-200/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-200/adapter_model.safetensors b/checkpoints/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee3d5bcf2bfd447fe3fea0fd6b5d520aadbb131d --- /dev/null +++ b/checkpoints/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4598e6833a1c4815a880cae51e13a38f2ab94be2ed91e75aabda7c1945e8e5dd +size 41248 diff --git a/checkpoints/checkpoint-200/chat_template.jinja b/checkpoints/checkpoint-200/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-200/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-200/optimizer.pt b/checkpoints/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ecdf04ccdff2a3d9a664d70d542b48fc3f80c40 --- /dev/null +++ b/checkpoints/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a13702a082cec2518c35d8b21e1231a9f9a7234e6f32483a3365fc24f149d137 +size 38889 diff --git a/checkpoints/checkpoint-200/rng_state.pth b/checkpoints/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-200/scheduler.pt b/checkpoints/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c24af522d593c6cbc8037581398b3e0c4d6a598 --- /dev/null +++ b/checkpoints/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7becff616ab4b426271452da885f09d6d39a1541d1ce4cd1a32088b9132ca59c +size 1465 diff --git a/checkpoints/checkpoint-200/special_tokens_map.json b/checkpoints/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-200/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-200/tokenizer.json b/checkpoints/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-200/tokenizer_config.json b/checkpoints/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-200/trainer_state.json b/checkpoints/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..92fa7def4699bbdd331362fece78e8ac81dec207 --- /dev/null +++ b/checkpoints/checkpoint-200/trainer_state.json @@ -0,0 +1,1450 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5044136191677175, + "eval_steps": 100, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2915603678969856.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-200/training_args.bin b/checkpoints/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-210/README.md b/checkpoints/checkpoint-210/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-210/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-210/adapter_config.json b/checkpoints/checkpoint-210/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-210/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-210/adapter_model.safetensors b/checkpoints/checkpoint-210/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aa324227f3cfb193102d7984ca5ba7ac6a63e034 --- /dev/null +++ b/checkpoints/checkpoint-210/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bfbfaa1414c4589f10811cf4c6c109e09208cfad985fcba261843ccf4d11412 +size 41248 diff --git a/checkpoints/checkpoint-210/chat_template.jinja b/checkpoints/checkpoint-210/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-210/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-210/optimizer.pt b/checkpoints/checkpoint-210/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c16939b077de076a35b8fc1fcc729ec86cfd0631 --- /dev/null +++ b/checkpoints/checkpoint-210/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6c90c000eaa0987c21943848ee5288ca0dbc12ad46bd136e00faa04caeb77f0 +size 38889 diff --git a/checkpoints/checkpoint-210/rng_state.pth b/checkpoints/checkpoint-210/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-210/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-210/scheduler.pt b/checkpoints/checkpoint-210/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f39a5795e787262dd7b4edaad5fcf643816704a --- /dev/null +++ b/checkpoints/checkpoint-210/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:279452e1128e4d9eddb9319134b9845f5fcdf0aea9147ef618f84ff3f22cc933 +size 1465 diff --git a/checkpoints/checkpoint-210/special_tokens_map.json b/checkpoints/checkpoint-210/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-210/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-210/tokenizer.json b/checkpoints/checkpoint-210/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-210/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-210/tokenizer_config.json b/checkpoints/checkpoint-210/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-210/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-210/trainer_state.json b/checkpoints/checkpoint-210/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1442392da955e94d9595e4e112c68f56e5a6fb2f --- /dev/null +++ b/checkpoints/checkpoint-210/trainer_state.json @@ -0,0 +1,1520 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5296343001261034, + "eval_steps": 100, + "global_step": 210, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3059661960069120.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-210/training_args.bin b/checkpoints/checkpoint-210/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-210/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-220/README.md b/checkpoints/checkpoint-220/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-220/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-220/adapter_config.json b/checkpoints/checkpoint-220/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-220/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-220/adapter_model.safetensors b/checkpoints/checkpoint-220/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..015a6a2c355678c0146a0a18b51107bb6b122373 --- /dev/null +++ b/checkpoints/checkpoint-220/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cce5661044c3eb3ce3632a422895a8604b37b412ec14aefdb544974e69a07b98 +size 41248 diff --git a/checkpoints/checkpoint-220/chat_template.jinja b/checkpoints/checkpoint-220/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-220/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-220/optimizer.pt b/checkpoints/checkpoint-220/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..dab8bb57258493025b25b8c340abf045fea4b508 --- /dev/null +++ b/checkpoints/checkpoint-220/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37b84c372d47c45716f517693e9016b8ff453687c21f806bc2b4f711125584d9 +size 38889 diff --git a/checkpoints/checkpoint-220/rng_state.pth b/checkpoints/checkpoint-220/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-220/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-220/scheduler.pt b/checkpoints/checkpoint-220/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0e0145a858ffa0aeb0a7e10d491cde2e21957e8 --- /dev/null +++ b/checkpoints/checkpoint-220/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33e842741dab5832f4a8b220e4e011f56188ace922943304e25b637be69a9edd +size 1465 diff --git a/checkpoints/checkpoint-220/special_tokens_map.json b/checkpoints/checkpoint-220/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-220/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-220/tokenizer.json b/checkpoints/checkpoint-220/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-220/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-220/tokenizer_config.json b/checkpoints/checkpoint-220/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-220/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-220/trainer_state.json b/checkpoints/checkpoint-220/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ea802dbb9998f63c59a9dc4e7df62b444fcb49bf --- /dev/null +++ b/checkpoints/checkpoint-220/trainer_state.json @@ -0,0 +1,1590 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5548549810844893, + "eval_steps": 100, + "global_step": 220, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3206534609338368.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-220/training_args.bin b/checkpoints/checkpoint-220/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-220/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-230/README.md b/checkpoints/checkpoint-230/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-230/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-230/adapter_config.json b/checkpoints/checkpoint-230/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-230/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-230/adapter_model.safetensors b/checkpoints/checkpoint-230/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe3ae3f18c9d6d9883639507e81e522cc0705505 --- /dev/null +++ b/checkpoints/checkpoint-230/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ce9574403a50dae3a1f4ae004ddd6919a1a1d5ef935acaf904c9c674fdac4db +size 41248 diff --git a/checkpoints/checkpoint-230/chat_template.jinja b/checkpoints/checkpoint-230/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-230/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-230/optimizer.pt b/checkpoints/checkpoint-230/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f1672b52d6a19ac39416203164946a3e06bb300 --- /dev/null +++ b/checkpoints/checkpoint-230/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:481234553f24b87a6c330cbbd54291e80f2e90c952d8fd01acb1c83428efe370 +size 38889 diff --git a/checkpoints/checkpoint-230/rng_state.pth b/checkpoints/checkpoint-230/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-230/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-230/scheduler.pt b/checkpoints/checkpoint-230/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4fcda74b603ccf71e0530ad23732f2ef2df25b56 --- /dev/null +++ b/checkpoints/checkpoint-230/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92ba6253aa1eb868e40fddf00971250b381b6b0d17eeb5eb3e368e34112796bb +size 1465 diff --git a/checkpoints/checkpoint-230/special_tokens_map.json b/checkpoints/checkpoint-230/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-230/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-230/tokenizer.json b/checkpoints/checkpoint-230/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-230/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-230/tokenizer_config.json b/checkpoints/checkpoint-230/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-230/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-230/trainer_state.json b/checkpoints/checkpoint-230/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0f265ed280a302d271faafcd55b4ed60c02d8bbb --- /dev/null +++ b/checkpoints/checkpoint-230/trainer_state.json @@ -0,0 +1,1660 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5800756620428752, + "eval_steps": 100, + "global_step": 230, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 25.63576316833496, + "learning_rate": 9.03061224489796e-06, + "loss": 1.9399, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 21.650251388549805, + "learning_rate": 8.979591836734695e-06, + "loss": 1.9565, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 29.605735778808594, + "learning_rate": 8.92857142857143e-06, + "loss": 1.729, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 23.98230743408203, + "learning_rate": 8.877551020408163e-06, + "loss": 1.9399, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 20.37510108947754, + "learning_rate": 8.826530612244899e-06, + "loss": 1.7322, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 25.876188278198242, + "learning_rate": 8.775510204081633e-06, + "loss": 1.9444, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 32.07249069213867, + "learning_rate": 8.724489795918369e-06, + "loss": 1.9364, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 28.014524459838867, + "learning_rate": 8.673469387755103e-06, + "loss": 1.757, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 30.82647132873535, + "learning_rate": 8.622448979591837e-06, + "loss": 1.9067, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 30.651660919189453, + "learning_rate": 8.571428571428571e-06, + "loss": 1.9906, + "step": 230 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3355988069253120.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-230/training_args.bin b/checkpoints/checkpoint-230/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-230/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-240/README.md b/checkpoints/checkpoint-240/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-240/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-240/adapter_config.json b/checkpoints/checkpoint-240/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-240/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-240/adapter_model.safetensors b/checkpoints/checkpoint-240/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97c87fe3a64aa145a5b271a2a5f30f93b2491c49 --- /dev/null +++ b/checkpoints/checkpoint-240/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c950b740ba7beebde56877b48c0caf2f5cb9b130c27efcb70716480572cc8d0 +size 41248 diff --git a/checkpoints/checkpoint-240/chat_template.jinja b/checkpoints/checkpoint-240/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-240/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-240/optimizer.pt b/checkpoints/checkpoint-240/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ca0e63d15d2afc3aada9e660c054f6a12aeb5adb --- /dev/null +++ b/checkpoints/checkpoint-240/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a71e4671fdb72f7d71b2e4f4a498affa12f1ad6f1bc9bc3ecfb38a26b8ec9c0 +size 38889 diff --git a/checkpoints/checkpoint-240/rng_state.pth b/checkpoints/checkpoint-240/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-240/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-240/scheduler.pt b/checkpoints/checkpoint-240/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b453ebb7919681d9420ce2f09571bc24b64acabb --- /dev/null +++ b/checkpoints/checkpoint-240/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49c3ddf88294d43f57a6768468ea07b58af47f33321a2bbc8e26dc0c097bdcbc +size 1465 diff --git a/checkpoints/checkpoint-240/special_tokens_map.json b/checkpoints/checkpoint-240/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-240/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-240/tokenizer.json b/checkpoints/checkpoint-240/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-240/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-240/tokenizer_config.json b/checkpoints/checkpoint-240/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-240/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-240/trainer_state.json b/checkpoints/checkpoint-240/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3090cc9cab6ed13967d9b84119d0e6bc34fb279a --- /dev/null +++ b/checkpoints/checkpoint-240/trainer_state.json @@ -0,0 +1,1730 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.605296343001261, + "eval_steps": 100, + "global_step": 240, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 25.63576316833496, + "learning_rate": 9.03061224489796e-06, + "loss": 1.9399, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 21.650251388549805, + "learning_rate": 8.979591836734695e-06, + "loss": 1.9565, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 29.605735778808594, + "learning_rate": 8.92857142857143e-06, + "loss": 1.729, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 23.98230743408203, + "learning_rate": 8.877551020408163e-06, + "loss": 1.9399, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 20.37510108947754, + "learning_rate": 8.826530612244899e-06, + "loss": 1.7322, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 25.876188278198242, + "learning_rate": 8.775510204081633e-06, + "loss": 1.9444, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 32.07249069213867, + "learning_rate": 8.724489795918369e-06, + "loss": 1.9364, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 28.014524459838867, + "learning_rate": 8.673469387755103e-06, + "loss": 1.757, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 30.82647132873535, + "learning_rate": 8.622448979591837e-06, + "loss": 1.9067, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 30.651660919189453, + "learning_rate": 8.571428571428571e-06, + "loss": 1.9906, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 25.239904403686523, + "learning_rate": 8.520408163265307e-06, + "loss": 1.8914, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 21.33747673034668, + "learning_rate": 8.469387755102042e-06, + "loss": 1.9999, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 25.255064010620117, + "learning_rate": 8.418367346938776e-06, + "loss": 1.8941, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 24.443973541259766, + "learning_rate": 8.36734693877551e-06, + "loss": 1.7679, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 25.473894119262695, + "learning_rate": 8.316326530612246e-06, + "loss": 1.7876, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 26.28467559814453, + "learning_rate": 8.26530612244898e-06, + "loss": 1.6761, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 24.488052368164062, + "learning_rate": 8.214285714285714e-06, + "loss": 2.0022, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 30.074064254760742, + "learning_rate": 8.16326530612245e-06, + "loss": 1.7747, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 23.73440170288086, + "learning_rate": 8.112244897959184e-06, + "loss": 1.8468, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 22.338869094848633, + "learning_rate": 8.06122448979592e-06, + "loss": 1.8611, + "step": 240 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3503152665427968.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-240/training_args.bin b/checkpoints/checkpoint-240/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-240/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-250/README.md b/checkpoints/checkpoint-250/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-250/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-250/adapter_config.json b/checkpoints/checkpoint-250/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-250/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-250/adapter_model.safetensors b/checkpoints/checkpoint-250/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..99253be290d3f74f498825426e9e224715420654 --- /dev/null +++ b/checkpoints/checkpoint-250/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47bd8a7f2e6f07def764d9e272eac3b79a67fe3f54f39efeb0f25b0ad78246c5 +size 41248 diff --git a/checkpoints/checkpoint-250/chat_template.jinja b/checkpoints/checkpoint-250/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-250/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-250/optimizer.pt b/checkpoints/checkpoint-250/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..68c54370bfe8766e1a4184cdd8bdcb9feada9b66 --- /dev/null +++ b/checkpoints/checkpoint-250/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ac0629b5d48b86c89efa64c9c560ffc43e464b58539a4ec69b551dae2ecdaab +size 38889 diff --git a/checkpoints/checkpoint-250/rng_state.pth b/checkpoints/checkpoint-250/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-250/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-250/scheduler.pt b/checkpoints/checkpoint-250/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5774913c84b88ce675c31ba8bdd32480bc6a99c8 --- /dev/null +++ b/checkpoints/checkpoint-250/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcbf2f9e120fc08853a4c1b6bafbdb5b9efe6409c69b08cd72378e1544d14e68 +size 1465 diff --git a/checkpoints/checkpoint-250/special_tokens_map.json b/checkpoints/checkpoint-250/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-250/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-250/tokenizer.json b/checkpoints/checkpoint-250/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-250/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-250/tokenizer_config.json b/checkpoints/checkpoint-250/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-250/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-250/trainer_state.json b/checkpoints/checkpoint-250/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..35d990716362586634c61318723b2cd5041317ab --- /dev/null +++ b/checkpoints/checkpoint-250/trainer_state.json @@ -0,0 +1,1800 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6305170239596469, + "eval_steps": 100, + "global_step": 250, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 25.63576316833496, + "learning_rate": 9.03061224489796e-06, + "loss": 1.9399, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 21.650251388549805, + "learning_rate": 8.979591836734695e-06, + "loss": 1.9565, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 29.605735778808594, + "learning_rate": 8.92857142857143e-06, + "loss": 1.729, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 23.98230743408203, + "learning_rate": 8.877551020408163e-06, + "loss": 1.9399, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 20.37510108947754, + "learning_rate": 8.826530612244899e-06, + "loss": 1.7322, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 25.876188278198242, + "learning_rate": 8.775510204081633e-06, + "loss": 1.9444, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 32.07249069213867, + "learning_rate": 8.724489795918369e-06, + "loss": 1.9364, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 28.014524459838867, + "learning_rate": 8.673469387755103e-06, + "loss": 1.757, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 30.82647132873535, + "learning_rate": 8.622448979591837e-06, + "loss": 1.9067, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 30.651660919189453, + "learning_rate": 8.571428571428571e-06, + "loss": 1.9906, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 25.239904403686523, + "learning_rate": 8.520408163265307e-06, + "loss": 1.8914, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 21.33747673034668, + "learning_rate": 8.469387755102042e-06, + "loss": 1.9999, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 25.255064010620117, + "learning_rate": 8.418367346938776e-06, + "loss": 1.8941, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 24.443973541259766, + "learning_rate": 8.36734693877551e-06, + "loss": 1.7679, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 25.473894119262695, + "learning_rate": 8.316326530612246e-06, + "loss": 1.7876, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 26.28467559814453, + "learning_rate": 8.26530612244898e-06, + "loss": 1.6761, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 24.488052368164062, + "learning_rate": 8.214285714285714e-06, + "loss": 2.0022, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 30.074064254760742, + "learning_rate": 8.16326530612245e-06, + "loss": 1.7747, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 23.73440170288086, + "learning_rate": 8.112244897959184e-06, + "loss": 1.8468, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 22.338869094848633, + "learning_rate": 8.06122448979592e-06, + "loss": 1.8611, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 24.844266891479492, + "learning_rate": 8.010204081632654e-06, + "loss": 1.9285, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 29.65668487548828, + "learning_rate": 7.959183673469388e-06, + "loss": 1.935, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 26.01723289489746, + "learning_rate": 7.908163265306124e-06, + "loss": 1.7587, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 27.04817771911621, + "learning_rate": 7.857142857142858e-06, + "loss": 1.8878, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 36.23786163330078, + "learning_rate": 7.806122448979593e-06, + "loss": 1.992, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 19.283294677734375, + "learning_rate": 7.755102040816327e-06, + "loss": 1.8066, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 24.24143409729004, + "learning_rate": 7.704081632653061e-06, + "loss": 1.8899, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 25.59832763671875, + "learning_rate": 7.653061224489796e-06, + "loss": 1.9601, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 27.195640563964844, + "learning_rate": 7.602040816326531e-06, + "loss": 1.9561, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 27.854570388793945, + "learning_rate": 7.551020408163265e-06, + "loss": 1.8781, + "step": 250 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3648915916455936.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-250/training_args.bin b/checkpoints/checkpoint-250/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-250/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-260/README.md b/checkpoints/checkpoint-260/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-260/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-260/adapter_config.json b/checkpoints/checkpoint-260/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-260/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-260/adapter_model.safetensors b/checkpoints/checkpoint-260/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f31476a59a3e77a8bf1565264ac2f1049f3ef1da --- /dev/null +++ b/checkpoints/checkpoint-260/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65d7732e8ddbaff3075e9641fb32ba08d3b8d9f8176ebe2a242d8bdafc8d8240 +size 41248 diff --git a/checkpoints/checkpoint-260/chat_template.jinja b/checkpoints/checkpoint-260/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-260/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-260/optimizer.pt b/checkpoints/checkpoint-260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..02373ac0608099d3ea49704a5f0bdbd4ef476a99 --- /dev/null +++ b/checkpoints/checkpoint-260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e5eeeed2299e8401b0ca5af14370b14f7c6172a7ac33cb36b84696c423367c3 +size 38953 diff --git a/checkpoints/checkpoint-260/rng_state.pth b/checkpoints/checkpoint-260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-260/scheduler.pt b/checkpoints/checkpoint-260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..776542df0e4a69c6cdf6ff3ffd6e58fdd9a6e4fc --- /dev/null +++ b/checkpoints/checkpoint-260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7977c50bbd96698bc74fb946fa8c04a9b3416f87cf2b5c362d393edfa17af4d8 +size 1465 diff --git a/checkpoints/checkpoint-260/special_tokens_map.json b/checkpoints/checkpoint-260/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-260/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-260/tokenizer.json b/checkpoints/checkpoint-260/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-260/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-260/tokenizer_config.json b/checkpoints/checkpoint-260/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-260/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-260/trainer_state.json b/checkpoints/checkpoint-260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3ab3c89d3adc827c7d833b040c06233879e3d456 --- /dev/null +++ b/checkpoints/checkpoint-260/trainer_state.json @@ -0,0 +1,1870 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6557377049180327, + "eval_steps": 100, + "global_step": 260, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 25.63576316833496, + "learning_rate": 9.03061224489796e-06, + "loss": 1.9399, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 21.650251388549805, + "learning_rate": 8.979591836734695e-06, + "loss": 1.9565, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 29.605735778808594, + "learning_rate": 8.92857142857143e-06, + "loss": 1.729, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 23.98230743408203, + "learning_rate": 8.877551020408163e-06, + "loss": 1.9399, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 20.37510108947754, + "learning_rate": 8.826530612244899e-06, + "loss": 1.7322, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 25.876188278198242, + "learning_rate": 8.775510204081633e-06, + "loss": 1.9444, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 32.07249069213867, + "learning_rate": 8.724489795918369e-06, + "loss": 1.9364, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 28.014524459838867, + "learning_rate": 8.673469387755103e-06, + "loss": 1.757, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 30.82647132873535, + "learning_rate": 8.622448979591837e-06, + "loss": 1.9067, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 30.651660919189453, + "learning_rate": 8.571428571428571e-06, + "loss": 1.9906, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 25.239904403686523, + "learning_rate": 8.520408163265307e-06, + "loss": 1.8914, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 21.33747673034668, + "learning_rate": 8.469387755102042e-06, + "loss": 1.9999, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 25.255064010620117, + "learning_rate": 8.418367346938776e-06, + "loss": 1.8941, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 24.443973541259766, + "learning_rate": 8.36734693877551e-06, + "loss": 1.7679, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 25.473894119262695, + "learning_rate": 8.316326530612246e-06, + "loss": 1.7876, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 26.28467559814453, + "learning_rate": 8.26530612244898e-06, + "loss": 1.6761, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 24.488052368164062, + "learning_rate": 8.214285714285714e-06, + "loss": 2.0022, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 30.074064254760742, + "learning_rate": 8.16326530612245e-06, + "loss": 1.7747, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 23.73440170288086, + "learning_rate": 8.112244897959184e-06, + "loss": 1.8468, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 22.338869094848633, + "learning_rate": 8.06122448979592e-06, + "loss": 1.8611, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 24.844266891479492, + "learning_rate": 8.010204081632654e-06, + "loss": 1.9285, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 29.65668487548828, + "learning_rate": 7.959183673469388e-06, + "loss": 1.935, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 26.01723289489746, + "learning_rate": 7.908163265306124e-06, + "loss": 1.7587, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 27.04817771911621, + "learning_rate": 7.857142857142858e-06, + "loss": 1.8878, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 36.23786163330078, + "learning_rate": 7.806122448979593e-06, + "loss": 1.992, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 19.283294677734375, + "learning_rate": 7.755102040816327e-06, + "loss": 1.8066, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 24.24143409729004, + "learning_rate": 7.704081632653061e-06, + "loss": 1.8899, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 25.59832763671875, + "learning_rate": 7.653061224489796e-06, + "loss": 1.9601, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 27.195640563964844, + "learning_rate": 7.602040816326531e-06, + "loss": 1.9561, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 27.854570388793945, + "learning_rate": 7.551020408163265e-06, + "loss": 1.8781, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 25.715761184692383, + "learning_rate": 7.500000000000001e-06, + "loss": 1.8542, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 22.562984466552734, + "learning_rate": 7.448979591836736e-06, + "loss": 1.7681, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 20.540348052978516, + "learning_rate": 7.39795918367347e-06, + "loss": 1.9617, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 24.610937118530273, + "learning_rate": 7.346938775510205e-06, + "loss": 1.9694, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 27.93538475036621, + "learning_rate": 7.295918367346939e-06, + "loss": 1.8858, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 31.466445922851562, + "learning_rate": 7.244897959183675e-06, + "loss": 1.8252, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 26.276226043701172, + "learning_rate": 7.193877551020409e-06, + "loss": 1.8865, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 22.52095603942871, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.7649, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 20.15144157409668, + "learning_rate": 7.091836734693878e-06, + "loss": 2.0158, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 26.405349731445312, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.8932, + "step": 260 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3795613397581824.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-260/training_args.bin b/checkpoints/checkpoint-260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-270/README.md b/checkpoints/checkpoint-270/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-270/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-270/adapter_config.json b/checkpoints/checkpoint-270/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-270/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-270/adapter_model.safetensors b/checkpoints/checkpoint-270/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1eb69ef75041ccb37233894cc407acb10072929a --- /dev/null +++ b/checkpoints/checkpoint-270/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fe4d16177bcc9854e70cc123ce2d97cc3538b3eaa96a3f21f356688c82cb11e +size 41248 diff --git a/checkpoints/checkpoint-270/chat_template.jinja b/checkpoints/checkpoint-270/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-270/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-270/optimizer.pt b/checkpoints/checkpoint-270/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..da9c267d4ee6655091442915f791dda4dacfa058 --- /dev/null +++ b/checkpoints/checkpoint-270/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:932f6a3bbdc2496dc5cfde29c389d4a012ef4b3629b60c36bfc3c1bfda5b1b04 +size 38953 diff --git a/checkpoints/checkpoint-270/rng_state.pth b/checkpoints/checkpoint-270/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-270/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-270/scheduler.pt b/checkpoints/checkpoint-270/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5497593929bd6d243a69caf38b9c42a0f945eac --- /dev/null +++ b/checkpoints/checkpoint-270/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11d379d79076dfe0d603f997647471f73ed75425774216fdf5f3f9e9ce71ee87 +size 1465 diff --git a/checkpoints/checkpoint-270/special_tokens_map.json b/checkpoints/checkpoint-270/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-270/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-270/tokenizer.json b/checkpoints/checkpoint-270/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-270/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-270/tokenizer_config.json b/checkpoints/checkpoint-270/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-270/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-270/trainer_state.json b/checkpoints/checkpoint-270/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2bc196b34bd00df4c0726bb873c765fae9a2a0b5 --- /dev/null +++ b/checkpoints/checkpoint-270/trainer_state.json @@ -0,0 +1,1940 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6809583858764187, + "eval_steps": 100, + "global_step": 270, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 25.63576316833496, + "learning_rate": 9.03061224489796e-06, + "loss": 1.9399, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 21.650251388549805, + "learning_rate": 8.979591836734695e-06, + "loss": 1.9565, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 29.605735778808594, + "learning_rate": 8.92857142857143e-06, + "loss": 1.729, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 23.98230743408203, + "learning_rate": 8.877551020408163e-06, + "loss": 1.9399, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 20.37510108947754, + "learning_rate": 8.826530612244899e-06, + "loss": 1.7322, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 25.876188278198242, + "learning_rate": 8.775510204081633e-06, + "loss": 1.9444, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 32.07249069213867, + "learning_rate": 8.724489795918369e-06, + "loss": 1.9364, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 28.014524459838867, + "learning_rate": 8.673469387755103e-06, + "loss": 1.757, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 30.82647132873535, + "learning_rate": 8.622448979591837e-06, + "loss": 1.9067, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 30.651660919189453, + "learning_rate": 8.571428571428571e-06, + "loss": 1.9906, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 25.239904403686523, + "learning_rate": 8.520408163265307e-06, + "loss": 1.8914, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 21.33747673034668, + "learning_rate": 8.469387755102042e-06, + "loss": 1.9999, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 25.255064010620117, + "learning_rate": 8.418367346938776e-06, + "loss": 1.8941, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 24.443973541259766, + "learning_rate": 8.36734693877551e-06, + "loss": 1.7679, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 25.473894119262695, + "learning_rate": 8.316326530612246e-06, + "loss": 1.7876, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 26.28467559814453, + "learning_rate": 8.26530612244898e-06, + "loss": 1.6761, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 24.488052368164062, + "learning_rate": 8.214285714285714e-06, + "loss": 2.0022, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 30.074064254760742, + "learning_rate": 8.16326530612245e-06, + "loss": 1.7747, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 23.73440170288086, + "learning_rate": 8.112244897959184e-06, + "loss": 1.8468, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 22.338869094848633, + "learning_rate": 8.06122448979592e-06, + "loss": 1.8611, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 24.844266891479492, + "learning_rate": 8.010204081632654e-06, + "loss": 1.9285, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 29.65668487548828, + "learning_rate": 7.959183673469388e-06, + "loss": 1.935, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 26.01723289489746, + "learning_rate": 7.908163265306124e-06, + "loss": 1.7587, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 27.04817771911621, + "learning_rate": 7.857142857142858e-06, + "loss": 1.8878, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 36.23786163330078, + "learning_rate": 7.806122448979593e-06, + "loss": 1.992, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 19.283294677734375, + "learning_rate": 7.755102040816327e-06, + "loss": 1.8066, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 24.24143409729004, + "learning_rate": 7.704081632653061e-06, + "loss": 1.8899, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 25.59832763671875, + "learning_rate": 7.653061224489796e-06, + "loss": 1.9601, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 27.195640563964844, + "learning_rate": 7.602040816326531e-06, + "loss": 1.9561, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 27.854570388793945, + "learning_rate": 7.551020408163265e-06, + "loss": 1.8781, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 25.715761184692383, + "learning_rate": 7.500000000000001e-06, + "loss": 1.8542, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 22.562984466552734, + "learning_rate": 7.448979591836736e-06, + "loss": 1.7681, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 20.540348052978516, + "learning_rate": 7.39795918367347e-06, + "loss": 1.9617, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 24.610937118530273, + "learning_rate": 7.346938775510205e-06, + "loss": 1.9694, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 27.93538475036621, + "learning_rate": 7.295918367346939e-06, + "loss": 1.8858, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 31.466445922851562, + "learning_rate": 7.244897959183675e-06, + "loss": 1.8252, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 26.276226043701172, + "learning_rate": 7.193877551020409e-06, + "loss": 1.8865, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 22.52095603942871, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.7649, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 20.15144157409668, + "learning_rate": 7.091836734693878e-06, + "loss": 2.0158, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 26.405349731445312, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.8932, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 32.94384765625, + "learning_rate": 6.989795918367348e-06, + "loss": 1.7795, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 23.109092712402344, + "learning_rate": 6.938775510204082e-06, + "loss": 1.8383, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 21.75737190246582, + "learning_rate": 6.887755102040817e-06, + "loss": 1.8727, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 22.96916389465332, + "learning_rate": 6.836734693877551e-06, + "loss": 1.8544, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 25.62445640563965, + "learning_rate": 6.785714285714287e-06, + "loss": 1.7503, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 25.430530548095703, + "learning_rate": 6.734693877551021e-06, + "loss": 1.7938, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 26.462881088256836, + "learning_rate": 6.683673469387756e-06, + "loss": 1.8284, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 31.45004653930664, + "learning_rate": 6.63265306122449e-06, + "loss": 2.0328, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 30.525737762451172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.8192, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 25.705707550048828, + "learning_rate": 6.530612244897959e-06, + "loss": 1.8533, + "step": 270 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3940547519397888.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-270/training_args.bin b/checkpoints/checkpoint-270/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-270/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-280/README.md b/checkpoints/checkpoint-280/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-280/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-280/adapter_config.json b/checkpoints/checkpoint-280/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-280/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-280/adapter_model.safetensors b/checkpoints/checkpoint-280/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..408ded1ea8ded2e39133adb325873b9c6d7a9065 --- /dev/null +++ b/checkpoints/checkpoint-280/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a0d1b02b4b6c3497b0db603d8a6826e9b8cb68896cfc36078faf89746c6149f +size 41248 diff --git a/checkpoints/checkpoint-280/chat_template.jinja b/checkpoints/checkpoint-280/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-280/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-280/optimizer.pt b/checkpoints/checkpoint-280/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d2bcb27e1a9430b1481cf6ef78a186a2012b9f9f --- /dev/null +++ b/checkpoints/checkpoint-280/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12319992f07ab12ef719a1c9b7e36024a18668a1cb5a4c743660956f183bc2b9 +size 38953 diff --git a/checkpoints/checkpoint-280/rng_state.pth b/checkpoints/checkpoint-280/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-280/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-280/scheduler.pt b/checkpoints/checkpoint-280/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..06dfbb522b23a6650608e1d782ac9ece3a792731 --- /dev/null +++ b/checkpoints/checkpoint-280/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d90c54f1c19f6ed09c6387c830af06f276122506bd071cad17b0d7098802f8a +size 1465 diff --git a/checkpoints/checkpoint-280/special_tokens_map.json b/checkpoints/checkpoint-280/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-280/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-280/tokenizer.json b/checkpoints/checkpoint-280/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-280/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-280/tokenizer_config.json b/checkpoints/checkpoint-280/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-280/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-280/trainer_state.json b/checkpoints/checkpoint-280/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6606ded732cc9758b9769b86fb2745d2074e1dbe --- /dev/null +++ b/checkpoints/checkpoint-280/trainer_state.json @@ -0,0 +1,2010 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7061790668348046, + "eval_steps": 100, + "global_step": 280, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 25.63576316833496, + "learning_rate": 9.03061224489796e-06, + "loss": 1.9399, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 21.650251388549805, + "learning_rate": 8.979591836734695e-06, + "loss": 1.9565, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 29.605735778808594, + "learning_rate": 8.92857142857143e-06, + "loss": 1.729, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 23.98230743408203, + "learning_rate": 8.877551020408163e-06, + "loss": 1.9399, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 20.37510108947754, + "learning_rate": 8.826530612244899e-06, + "loss": 1.7322, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 25.876188278198242, + "learning_rate": 8.775510204081633e-06, + "loss": 1.9444, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 32.07249069213867, + "learning_rate": 8.724489795918369e-06, + "loss": 1.9364, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 28.014524459838867, + "learning_rate": 8.673469387755103e-06, + "loss": 1.757, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 30.82647132873535, + "learning_rate": 8.622448979591837e-06, + "loss": 1.9067, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 30.651660919189453, + "learning_rate": 8.571428571428571e-06, + "loss": 1.9906, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 25.239904403686523, + "learning_rate": 8.520408163265307e-06, + "loss": 1.8914, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 21.33747673034668, + "learning_rate": 8.469387755102042e-06, + "loss": 1.9999, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 25.255064010620117, + "learning_rate": 8.418367346938776e-06, + "loss": 1.8941, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 24.443973541259766, + "learning_rate": 8.36734693877551e-06, + "loss": 1.7679, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 25.473894119262695, + "learning_rate": 8.316326530612246e-06, + "loss": 1.7876, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 26.28467559814453, + "learning_rate": 8.26530612244898e-06, + "loss": 1.6761, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 24.488052368164062, + "learning_rate": 8.214285714285714e-06, + "loss": 2.0022, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 30.074064254760742, + "learning_rate": 8.16326530612245e-06, + "loss": 1.7747, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 23.73440170288086, + "learning_rate": 8.112244897959184e-06, + "loss": 1.8468, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 22.338869094848633, + "learning_rate": 8.06122448979592e-06, + "loss": 1.8611, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 24.844266891479492, + "learning_rate": 8.010204081632654e-06, + "loss": 1.9285, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 29.65668487548828, + "learning_rate": 7.959183673469388e-06, + "loss": 1.935, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 26.01723289489746, + "learning_rate": 7.908163265306124e-06, + "loss": 1.7587, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 27.04817771911621, + "learning_rate": 7.857142857142858e-06, + "loss": 1.8878, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 36.23786163330078, + "learning_rate": 7.806122448979593e-06, + "loss": 1.992, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 19.283294677734375, + "learning_rate": 7.755102040816327e-06, + "loss": 1.8066, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 24.24143409729004, + "learning_rate": 7.704081632653061e-06, + "loss": 1.8899, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 25.59832763671875, + "learning_rate": 7.653061224489796e-06, + "loss": 1.9601, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 27.195640563964844, + "learning_rate": 7.602040816326531e-06, + "loss": 1.9561, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 27.854570388793945, + "learning_rate": 7.551020408163265e-06, + "loss": 1.8781, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 25.715761184692383, + "learning_rate": 7.500000000000001e-06, + "loss": 1.8542, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 22.562984466552734, + "learning_rate": 7.448979591836736e-06, + "loss": 1.7681, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 20.540348052978516, + "learning_rate": 7.39795918367347e-06, + "loss": 1.9617, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 24.610937118530273, + "learning_rate": 7.346938775510205e-06, + "loss": 1.9694, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 27.93538475036621, + "learning_rate": 7.295918367346939e-06, + "loss": 1.8858, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 31.466445922851562, + "learning_rate": 7.244897959183675e-06, + "loss": 1.8252, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 26.276226043701172, + "learning_rate": 7.193877551020409e-06, + "loss": 1.8865, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 22.52095603942871, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.7649, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 20.15144157409668, + "learning_rate": 7.091836734693878e-06, + "loss": 2.0158, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 26.405349731445312, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.8932, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 32.94384765625, + "learning_rate": 6.989795918367348e-06, + "loss": 1.7795, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 23.109092712402344, + "learning_rate": 6.938775510204082e-06, + "loss": 1.8383, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 21.75737190246582, + "learning_rate": 6.887755102040817e-06, + "loss": 1.8727, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 22.96916389465332, + "learning_rate": 6.836734693877551e-06, + "loss": 1.8544, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 25.62445640563965, + "learning_rate": 6.785714285714287e-06, + "loss": 1.7503, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 25.430530548095703, + "learning_rate": 6.734693877551021e-06, + "loss": 1.7938, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 26.462881088256836, + "learning_rate": 6.683673469387756e-06, + "loss": 1.8284, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 31.45004653930664, + "learning_rate": 6.63265306122449e-06, + "loss": 2.0328, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 30.525737762451172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.8192, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 25.705707550048828, + "learning_rate": 6.530612244897959e-06, + "loss": 1.8533, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 39.90187454223633, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.9483, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 28.0180721282959, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.8132, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 34.821372985839844, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.9599, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 24.018394470214844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.9248, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 24.074344635009766, + "learning_rate": 6.275510204081633e-06, + "loss": 2.0148, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 31.1939754486084, + "learning_rate": 6.224489795918368e-06, + "loss": 1.8959, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 25.481502532958984, + "learning_rate": 6.173469387755102e-06, + "loss": 1.9832, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 29.6664981842041, + "learning_rate": 6.122448979591837e-06, + "loss": 1.9222, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 26.30698585510254, + "learning_rate": 6.071428571428571e-06, + "loss": 1.9897, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 31.827558517456055, + "learning_rate": 6.020408163265307e-06, + "loss": 1.8615, + "step": 280 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4086918019989504.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-280/training_args.bin b/checkpoints/checkpoint-280/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-280/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-290/README.md b/checkpoints/checkpoint-290/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-290/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-290/adapter_config.json b/checkpoints/checkpoint-290/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-290/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-290/adapter_model.safetensors b/checkpoints/checkpoint-290/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb549bed13d2641226514292aa7788ee16227e5d --- /dev/null +++ b/checkpoints/checkpoint-290/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0218fb3abbc392e630035060ae278104db91a2e0c943a53ddb5eacb85a5ed948 +size 41248 diff --git a/checkpoints/checkpoint-290/chat_template.jinja b/checkpoints/checkpoint-290/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-290/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-290/optimizer.pt b/checkpoints/checkpoint-290/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..432a28692e3a347d08c80899433f1a2b73463a3f --- /dev/null +++ b/checkpoints/checkpoint-290/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14e26effc42e350306bbe199ab0eded105250a9884d99e5823fccf270ca96067 +size 38953 diff --git a/checkpoints/checkpoint-290/rng_state.pth b/checkpoints/checkpoint-290/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-290/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-290/scheduler.pt b/checkpoints/checkpoint-290/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f265a846e0ff405a28bff666e3bf234497d6dd48 --- /dev/null +++ b/checkpoints/checkpoint-290/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a909198e907fc5706177ad04bb3bfa534cb12f46872764cf62e38366c40aed57 +size 1465 diff --git a/checkpoints/checkpoint-290/special_tokens_map.json b/checkpoints/checkpoint-290/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-290/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-290/tokenizer.json b/checkpoints/checkpoint-290/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-290/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-290/tokenizer_config.json b/checkpoints/checkpoint-290/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-290/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-290/trainer_state.json b/checkpoints/checkpoint-290/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3ad23de3fe4ec2189dc83ba4ba828ed98b8a5757 --- /dev/null +++ b/checkpoints/checkpoint-290/trainer_state.json @@ -0,0 +1,2080 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7313997477931904, + "eval_steps": 100, + "global_step": 290, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 25.63576316833496, + "learning_rate": 9.03061224489796e-06, + "loss": 1.9399, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 21.650251388549805, + "learning_rate": 8.979591836734695e-06, + "loss": 1.9565, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 29.605735778808594, + "learning_rate": 8.92857142857143e-06, + "loss": 1.729, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 23.98230743408203, + "learning_rate": 8.877551020408163e-06, + "loss": 1.9399, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 20.37510108947754, + "learning_rate": 8.826530612244899e-06, + "loss": 1.7322, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 25.876188278198242, + "learning_rate": 8.775510204081633e-06, + "loss": 1.9444, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 32.07249069213867, + "learning_rate": 8.724489795918369e-06, + "loss": 1.9364, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 28.014524459838867, + "learning_rate": 8.673469387755103e-06, + "loss": 1.757, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 30.82647132873535, + "learning_rate": 8.622448979591837e-06, + "loss": 1.9067, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 30.651660919189453, + "learning_rate": 8.571428571428571e-06, + "loss": 1.9906, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 25.239904403686523, + "learning_rate": 8.520408163265307e-06, + "loss": 1.8914, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 21.33747673034668, + "learning_rate": 8.469387755102042e-06, + "loss": 1.9999, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 25.255064010620117, + "learning_rate": 8.418367346938776e-06, + "loss": 1.8941, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 24.443973541259766, + "learning_rate": 8.36734693877551e-06, + "loss": 1.7679, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 25.473894119262695, + "learning_rate": 8.316326530612246e-06, + "loss": 1.7876, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 26.28467559814453, + "learning_rate": 8.26530612244898e-06, + "loss": 1.6761, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 24.488052368164062, + "learning_rate": 8.214285714285714e-06, + "loss": 2.0022, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 30.074064254760742, + "learning_rate": 8.16326530612245e-06, + "loss": 1.7747, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 23.73440170288086, + "learning_rate": 8.112244897959184e-06, + "loss": 1.8468, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 22.338869094848633, + "learning_rate": 8.06122448979592e-06, + "loss": 1.8611, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 24.844266891479492, + "learning_rate": 8.010204081632654e-06, + "loss": 1.9285, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 29.65668487548828, + "learning_rate": 7.959183673469388e-06, + "loss": 1.935, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 26.01723289489746, + "learning_rate": 7.908163265306124e-06, + "loss": 1.7587, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 27.04817771911621, + "learning_rate": 7.857142857142858e-06, + "loss": 1.8878, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 36.23786163330078, + "learning_rate": 7.806122448979593e-06, + "loss": 1.992, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 19.283294677734375, + "learning_rate": 7.755102040816327e-06, + "loss": 1.8066, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 24.24143409729004, + "learning_rate": 7.704081632653061e-06, + "loss": 1.8899, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 25.59832763671875, + "learning_rate": 7.653061224489796e-06, + "loss": 1.9601, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 27.195640563964844, + "learning_rate": 7.602040816326531e-06, + "loss": 1.9561, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 27.854570388793945, + "learning_rate": 7.551020408163265e-06, + "loss": 1.8781, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 25.715761184692383, + "learning_rate": 7.500000000000001e-06, + "loss": 1.8542, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 22.562984466552734, + "learning_rate": 7.448979591836736e-06, + "loss": 1.7681, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 20.540348052978516, + "learning_rate": 7.39795918367347e-06, + "loss": 1.9617, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 24.610937118530273, + "learning_rate": 7.346938775510205e-06, + "loss": 1.9694, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 27.93538475036621, + "learning_rate": 7.295918367346939e-06, + "loss": 1.8858, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 31.466445922851562, + "learning_rate": 7.244897959183675e-06, + "loss": 1.8252, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 26.276226043701172, + "learning_rate": 7.193877551020409e-06, + "loss": 1.8865, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 22.52095603942871, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.7649, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 20.15144157409668, + "learning_rate": 7.091836734693878e-06, + "loss": 2.0158, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 26.405349731445312, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.8932, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 32.94384765625, + "learning_rate": 6.989795918367348e-06, + "loss": 1.7795, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 23.109092712402344, + "learning_rate": 6.938775510204082e-06, + "loss": 1.8383, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 21.75737190246582, + "learning_rate": 6.887755102040817e-06, + "loss": 1.8727, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 22.96916389465332, + "learning_rate": 6.836734693877551e-06, + "loss": 1.8544, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 25.62445640563965, + "learning_rate": 6.785714285714287e-06, + "loss": 1.7503, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 25.430530548095703, + "learning_rate": 6.734693877551021e-06, + "loss": 1.7938, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 26.462881088256836, + "learning_rate": 6.683673469387756e-06, + "loss": 1.8284, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 31.45004653930664, + "learning_rate": 6.63265306122449e-06, + "loss": 2.0328, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 30.525737762451172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.8192, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 25.705707550048828, + "learning_rate": 6.530612244897959e-06, + "loss": 1.8533, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 39.90187454223633, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.9483, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 28.0180721282959, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.8132, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 34.821372985839844, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.9599, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 24.018394470214844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.9248, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 24.074344635009766, + "learning_rate": 6.275510204081633e-06, + "loss": 2.0148, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 31.1939754486084, + "learning_rate": 6.224489795918368e-06, + "loss": 1.8959, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 25.481502532958984, + "learning_rate": 6.173469387755102e-06, + "loss": 1.9832, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 29.6664981842041, + "learning_rate": 6.122448979591837e-06, + "loss": 1.9222, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 26.30698585510254, + "learning_rate": 6.071428571428571e-06, + "loss": 1.9897, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 31.827558517456055, + "learning_rate": 6.020408163265307e-06, + "loss": 1.8615, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 24.80223846435547, + "learning_rate": 5.969387755102042e-06, + "loss": 1.9579, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 36.134700775146484, + "learning_rate": 5.918367346938776e-06, + "loss": 1.723, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 30.388233184814453, + "learning_rate": 5.867346938775511e-06, + "loss": 1.9736, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 32.231563568115234, + "learning_rate": 5.816326530612246e-06, + "loss": 1.9228, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 38.05869674682617, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.9159, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 27.256147384643555, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.8072, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 25.67181396484375, + "learning_rate": 5.663265306122449e-06, + "loss": 1.8633, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 31.8681697845459, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.9822, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 32.85325241088867, + "learning_rate": 5.561224489795919e-06, + "loss": 1.8166, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 35.64312744140625, + "learning_rate": 5.510204081632653e-06, + "loss": 1.7166, + "step": 290 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4234748255109120.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-290/training_args.bin b/checkpoints/checkpoint-290/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-290/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-30/README.md b/checkpoints/checkpoint-30/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-30/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-30/adapter_config.json b/checkpoints/checkpoint-30/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-30/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-30/adapter_model.safetensors b/checkpoints/checkpoint-30/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cde85ac07363dd8baf4a344c54673af1b3e24a77 --- /dev/null +++ b/checkpoints/checkpoint-30/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91326fbd3f1c3254a80a78f44a18dc6823cc40c00e83c6e77caaa1cb5062d95d +size 41248 diff --git a/checkpoints/checkpoint-30/chat_template.jinja b/checkpoints/checkpoint-30/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-30/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-30/optimizer.pt b/checkpoints/checkpoint-30/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a5327b8b3eb5b03b008ef3574c16cb183a96c5e --- /dev/null +++ b/checkpoints/checkpoint-30/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b341f2372620e40d6a598d596f2e1f4766cf647310e80136ef75c7a52a18340 +size 38889 diff --git a/checkpoints/checkpoint-30/rng_state.pth b/checkpoints/checkpoint-30/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-30/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-30/scheduler.pt b/checkpoints/checkpoint-30/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f73c8d6159216f64da7c65bb2f34688265f81cd --- /dev/null +++ b/checkpoints/checkpoint-30/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79bc3c8496017652eacbb97371112ed9f204687c83413137ac0ca43459c97aa1 +size 1465 diff --git a/checkpoints/checkpoint-30/special_tokens_map.json b/checkpoints/checkpoint-30/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-30/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-30/tokenizer.json b/checkpoints/checkpoint-30/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-30/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-30/tokenizer_config.json b/checkpoints/checkpoint-30/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-30/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-30/trainer_state.json b/checkpoints/checkpoint-30/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..935ab133ceba1eabe4295873cf55e21eaa637df2 --- /dev/null +++ b/checkpoints/checkpoint-30/trainer_state.json @@ -0,0 +1,244 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.07566204287515763, + "eval_steps": 100, + "global_step": 30, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 435876730060800.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-30/training_args.bin b/checkpoints/checkpoint-30/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-30/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-300/README.md b/checkpoints/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-300/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-300/adapter_config.json b/checkpoints/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-300/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-300/adapter_model.safetensors b/checkpoints/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ed82528d08b83b4e5e043637ad2c49bf508d8c3 --- /dev/null +++ b/checkpoints/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35ad8a5c29ac65ba41010377bbbf4c2365dff97025a1313ff0bd565515b67484 +size 41248 diff --git a/checkpoints/checkpoint-300/chat_template.jinja b/checkpoints/checkpoint-300/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-300/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-300/optimizer.pt b/checkpoints/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..959c4e2d8869ef95177e12a942b57f5b56710e0b --- /dev/null +++ b/checkpoints/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e81d3b226a2be25a5978bf6cb20035cf47a9272e281bde49f0119f1f2b3f14e5 +size 38953 diff --git a/checkpoints/checkpoint-300/rng_state.pth b/checkpoints/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-300/scheduler.pt b/checkpoints/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb76a48f8ddea3bec9280e93c444bb1d071ddfc5 --- /dev/null +++ b/checkpoints/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:794e9a3813693a751baa493bd4c469b6123b00fae4ecae572ea47b8479d6a6ff +size 1465 diff --git a/checkpoints/checkpoint-300/special_tokens_map.json b/checkpoints/checkpoint-300/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-300/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-300/tokenizer.json b/checkpoints/checkpoint-300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-300/tokenizer_config.json b/checkpoints/checkpoint-300/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-300/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-300/trainer_state.json b/checkpoints/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fb24d71b457f81bc0472fd3bd97c1437d92f8b4e --- /dev/null +++ b/checkpoints/checkpoint-300/trainer_state.json @@ -0,0 +1,2158 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7566204287515763, + "eval_steps": 100, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 25.63576316833496, + "learning_rate": 9.03061224489796e-06, + "loss": 1.9399, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 21.650251388549805, + "learning_rate": 8.979591836734695e-06, + "loss": 1.9565, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 29.605735778808594, + "learning_rate": 8.92857142857143e-06, + "loss": 1.729, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 23.98230743408203, + "learning_rate": 8.877551020408163e-06, + "loss": 1.9399, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 20.37510108947754, + "learning_rate": 8.826530612244899e-06, + "loss": 1.7322, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 25.876188278198242, + "learning_rate": 8.775510204081633e-06, + "loss": 1.9444, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 32.07249069213867, + "learning_rate": 8.724489795918369e-06, + "loss": 1.9364, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 28.014524459838867, + "learning_rate": 8.673469387755103e-06, + "loss": 1.757, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 30.82647132873535, + "learning_rate": 8.622448979591837e-06, + "loss": 1.9067, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 30.651660919189453, + "learning_rate": 8.571428571428571e-06, + "loss": 1.9906, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 25.239904403686523, + "learning_rate": 8.520408163265307e-06, + "loss": 1.8914, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 21.33747673034668, + "learning_rate": 8.469387755102042e-06, + "loss": 1.9999, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 25.255064010620117, + "learning_rate": 8.418367346938776e-06, + "loss": 1.8941, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 24.443973541259766, + "learning_rate": 8.36734693877551e-06, + "loss": 1.7679, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 25.473894119262695, + "learning_rate": 8.316326530612246e-06, + "loss": 1.7876, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 26.28467559814453, + "learning_rate": 8.26530612244898e-06, + "loss": 1.6761, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 24.488052368164062, + "learning_rate": 8.214285714285714e-06, + "loss": 2.0022, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 30.074064254760742, + "learning_rate": 8.16326530612245e-06, + "loss": 1.7747, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 23.73440170288086, + "learning_rate": 8.112244897959184e-06, + "loss": 1.8468, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 22.338869094848633, + "learning_rate": 8.06122448979592e-06, + "loss": 1.8611, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 24.844266891479492, + "learning_rate": 8.010204081632654e-06, + "loss": 1.9285, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 29.65668487548828, + "learning_rate": 7.959183673469388e-06, + "loss": 1.935, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 26.01723289489746, + "learning_rate": 7.908163265306124e-06, + "loss": 1.7587, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 27.04817771911621, + "learning_rate": 7.857142857142858e-06, + "loss": 1.8878, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 36.23786163330078, + "learning_rate": 7.806122448979593e-06, + "loss": 1.992, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 19.283294677734375, + "learning_rate": 7.755102040816327e-06, + "loss": 1.8066, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 24.24143409729004, + "learning_rate": 7.704081632653061e-06, + "loss": 1.8899, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 25.59832763671875, + "learning_rate": 7.653061224489796e-06, + "loss": 1.9601, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 27.195640563964844, + "learning_rate": 7.602040816326531e-06, + "loss": 1.9561, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 27.854570388793945, + "learning_rate": 7.551020408163265e-06, + "loss": 1.8781, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 25.715761184692383, + "learning_rate": 7.500000000000001e-06, + "loss": 1.8542, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 22.562984466552734, + "learning_rate": 7.448979591836736e-06, + "loss": 1.7681, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 20.540348052978516, + "learning_rate": 7.39795918367347e-06, + "loss": 1.9617, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 24.610937118530273, + "learning_rate": 7.346938775510205e-06, + "loss": 1.9694, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 27.93538475036621, + "learning_rate": 7.295918367346939e-06, + "loss": 1.8858, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 31.466445922851562, + "learning_rate": 7.244897959183675e-06, + "loss": 1.8252, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 26.276226043701172, + "learning_rate": 7.193877551020409e-06, + "loss": 1.8865, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 22.52095603942871, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.7649, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 20.15144157409668, + "learning_rate": 7.091836734693878e-06, + "loss": 2.0158, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 26.405349731445312, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.8932, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 32.94384765625, + "learning_rate": 6.989795918367348e-06, + "loss": 1.7795, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 23.109092712402344, + "learning_rate": 6.938775510204082e-06, + "loss": 1.8383, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 21.75737190246582, + "learning_rate": 6.887755102040817e-06, + "loss": 1.8727, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 22.96916389465332, + "learning_rate": 6.836734693877551e-06, + "loss": 1.8544, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 25.62445640563965, + "learning_rate": 6.785714285714287e-06, + "loss": 1.7503, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 25.430530548095703, + "learning_rate": 6.734693877551021e-06, + "loss": 1.7938, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 26.462881088256836, + "learning_rate": 6.683673469387756e-06, + "loss": 1.8284, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 31.45004653930664, + "learning_rate": 6.63265306122449e-06, + "loss": 2.0328, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 30.525737762451172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.8192, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 25.705707550048828, + "learning_rate": 6.530612244897959e-06, + "loss": 1.8533, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 39.90187454223633, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.9483, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 28.0180721282959, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.8132, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 34.821372985839844, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.9599, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 24.018394470214844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.9248, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 24.074344635009766, + "learning_rate": 6.275510204081633e-06, + "loss": 2.0148, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 31.1939754486084, + "learning_rate": 6.224489795918368e-06, + "loss": 1.8959, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 25.481502532958984, + "learning_rate": 6.173469387755102e-06, + "loss": 1.9832, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 29.6664981842041, + "learning_rate": 6.122448979591837e-06, + "loss": 1.9222, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 26.30698585510254, + "learning_rate": 6.071428571428571e-06, + "loss": 1.9897, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 31.827558517456055, + "learning_rate": 6.020408163265307e-06, + "loss": 1.8615, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 24.80223846435547, + "learning_rate": 5.969387755102042e-06, + "loss": 1.9579, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 36.134700775146484, + "learning_rate": 5.918367346938776e-06, + "loss": 1.723, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 30.388233184814453, + "learning_rate": 5.867346938775511e-06, + "loss": 1.9736, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 32.231563568115234, + "learning_rate": 5.816326530612246e-06, + "loss": 1.9228, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 38.05869674682617, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.9159, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 27.256147384643555, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.8072, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 25.67181396484375, + "learning_rate": 5.663265306122449e-06, + "loss": 1.8633, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 31.8681697845459, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.9822, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 32.85325241088867, + "learning_rate": 5.561224489795919e-06, + "loss": 1.8166, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 35.64312744140625, + "learning_rate": 5.510204081632653e-06, + "loss": 1.7166, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 24.276235580444336, + "learning_rate": 5.459183673469388e-06, + "loss": 1.7593, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 29.371950149536133, + "learning_rate": 5.408163265306123e-06, + "loss": 1.8124, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 23.76220703125, + "learning_rate": 5.357142857142857e-06, + "loss": 1.7775, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 37.103050231933594, + "learning_rate": 5.306122448979593e-06, + "loss": 1.9253, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 20.0811767578125, + "learning_rate": 5.255102040816327e-06, + "loss": 1.8711, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 35.33123016357422, + "learning_rate": 5.204081632653062e-06, + "loss": 1.8764, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 31.880672454833984, + "learning_rate": 5.153061224489796e-06, + "loss": 1.8929, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 21.682334899902344, + "learning_rate": 5.1020408163265315e-06, + "loss": 2.0377, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 34.68608474731445, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.9341, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 25.59632110595703, + "learning_rate": 5e-06, + "loss": 1.8264, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.8502724170684814, + "eval_runtime": 6.6208, + "eval_samples_per_second": 106.483, + "eval_steps_per_second": 53.317, + "step": 300 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4379483853029376.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-300/training_args.bin b/checkpoints/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-310/README.md b/checkpoints/checkpoint-310/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-310/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-310/adapter_config.json b/checkpoints/checkpoint-310/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-310/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-310/adapter_model.safetensors b/checkpoints/checkpoint-310/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aca216eb2dbd69b3a42d375f287046125d7b4ae1 --- /dev/null +++ b/checkpoints/checkpoint-310/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:413888f40803140c88c59710924a8da126816f13d60f56a402b2a42573e6b960 +size 41248 diff --git a/checkpoints/checkpoint-310/chat_template.jinja b/checkpoints/checkpoint-310/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-310/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-310/optimizer.pt b/checkpoints/checkpoint-310/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d79e63ff64f82515d08e3c5101bcf2c71b145d70 --- /dev/null +++ b/checkpoints/checkpoint-310/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cf37464a47554ce95a274a4963f42ab2281af29c031b7a20a04f1c3b400141f +size 38953 diff --git a/checkpoints/checkpoint-310/rng_state.pth b/checkpoints/checkpoint-310/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-310/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-310/scheduler.pt b/checkpoints/checkpoint-310/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..593ded785741662a028f56717508348ff3096e44 --- /dev/null +++ b/checkpoints/checkpoint-310/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:587894a39cc3c150059fb29c66e53bf8aa6882d85638d646e4af818a3032f3d4 +size 1465 diff --git a/checkpoints/checkpoint-310/special_tokens_map.json b/checkpoints/checkpoint-310/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-310/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-310/tokenizer.json b/checkpoints/checkpoint-310/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-310/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-310/tokenizer_config.json b/checkpoints/checkpoint-310/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-310/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-310/trainer_state.json b/checkpoints/checkpoint-310/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3012af4dcecb8da8c830462a331b8678717e7703 --- /dev/null +++ b/checkpoints/checkpoint-310/trainer_state.json @@ -0,0 +1,2228 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7818411097099621, + "eval_steps": 100, + "global_step": 310, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 25.63576316833496, + "learning_rate": 9.03061224489796e-06, + "loss": 1.9399, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 21.650251388549805, + "learning_rate": 8.979591836734695e-06, + "loss": 1.9565, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 29.605735778808594, + "learning_rate": 8.92857142857143e-06, + "loss": 1.729, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 23.98230743408203, + "learning_rate": 8.877551020408163e-06, + "loss": 1.9399, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 20.37510108947754, + "learning_rate": 8.826530612244899e-06, + "loss": 1.7322, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 25.876188278198242, + "learning_rate": 8.775510204081633e-06, + "loss": 1.9444, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 32.07249069213867, + "learning_rate": 8.724489795918369e-06, + "loss": 1.9364, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 28.014524459838867, + "learning_rate": 8.673469387755103e-06, + "loss": 1.757, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 30.82647132873535, + "learning_rate": 8.622448979591837e-06, + "loss": 1.9067, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 30.651660919189453, + "learning_rate": 8.571428571428571e-06, + "loss": 1.9906, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 25.239904403686523, + "learning_rate": 8.520408163265307e-06, + "loss": 1.8914, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 21.33747673034668, + "learning_rate": 8.469387755102042e-06, + "loss": 1.9999, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 25.255064010620117, + "learning_rate": 8.418367346938776e-06, + "loss": 1.8941, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 24.443973541259766, + "learning_rate": 8.36734693877551e-06, + "loss": 1.7679, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 25.473894119262695, + "learning_rate": 8.316326530612246e-06, + "loss": 1.7876, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 26.28467559814453, + "learning_rate": 8.26530612244898e-06, + "loss": 1.6761, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 24.488052368164062, + "learning_rate": 8.214285714285714e-06, + "loss": 2.0022, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 30.074064254760742, + "learning_rate": 8.16326530612245e-06, + "loss": 1.7747, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 23.73440170288086, + "learning_rate": 8.112244897959184e-06, + "loss": 1.8468, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 22.338869094848633, + "learning_rate": 8.06122448979592e-06, + "loss": 1.8611, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 24.844266891479492, + "learning_rate": 8.010204081632654e-06, + "loss": 1.9285, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 29.65668487548828, + "learning_rate": 7.959183673469388e-06, + "loss": 1.935, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 26.01723289489746, + "learning_rate": 7.908163265306124e-06, + "loss": 1.7587, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 27.04817771911621, + "learning_rate": 7.857142857142858e-06, + "loss": 1.8878, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 36.23786163330078, + "learning_rate": 7.806122448979593e-06, + "loss": 1.992, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 19.283294677734375, + "learning_rate": 7.755102040816327e-06, + "loss": 1.8066, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 24.24143409729004, + "learning_rate": 7.704081632653061e-06, + "loss": 1.8899, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 25.59832763671875, + "learning_rate": 7.653061224489796e-06, + "loss": 1.9601, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 27.195640563964844, + "learning_rate": 7.602040816326531e-06, + "loss": 1.9561, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 27.854570388793945, + "learning_rate": 7.551020408163265e-06, + "loss": 1.8781, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 25.715761184692383, + "learning_rate": 7.500000000000001e-06, + "loss": 1.8542, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 22.562984466552734, + "learning_rate": 7.448979591836736e-06, + "loss": 1.7681, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 20.540348052978516, + "learning_rate": 7.39795918367347e-06, + "loss": 1.9617, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 24.610937118530273, + "learning_rate": 7.346938775510205e-06, + "loss": 1.9694, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 27.93538475036621, + "learning_rate": 7.295918367346939e-06, + "loss": 1.8858, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 31.466445922851562, + "learning_rate": 7.244897959183675e-06, + "loss": 1.8252, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 26.276226043701172, + "learning_rate": 7.193877551020409e-06, + "loss": 1.8865, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 22.52095603942871, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.7649, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 20.15144157409668, + "learning_rate": 7.091836734693878e-06, + "loss": 2.0158, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 26.405349731445312, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.8932, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 32.94384765625, + "learning_rate": 6.989795918367348e-06, + "loss": 1.7795, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 23.109092712402344, + "learning_rate": 6.938775510204082e-06, + "loss": 1.8383, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 21.75737190246582, + "learning_rate": 6.887755102040817e-06, + "loss": 1.8727, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 22.96916389465332, + "learning_rate": 6.836734693877551e-06, + "loss": 1.8544, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 25.62445640563965, + "learning_rate": 6.785714285714287e-06, + "loss": 1.7503, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 25.430530548095703, + "learning_rate": 6.734693877551021e-06, + "loss": 1.7938, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 26.462881088256836, + "learning_rate": 6.683673469387756e-06, + "loss": 1.8284, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 31.45004653930664, + "learning_rate": 6.63265306122449e-06, + "loss": 2.0328, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 30.525737762451172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.8192, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 25.705707550048828, + "learning_rate": 6.530612244897959e-06, + "loss": 1.8533, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 39.90187454223633, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.9483, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 28.0180721282959, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.8132, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 34.821372985839844, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.9599, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 24.018394470214844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.9248, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 24.074344635009766, + "learning_rate": 6.275510204081633e-06, + "loss": 2.0148, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 31.1939754486084, + "learning_rate": 6.224489795918368e-06, + "loss": 1.8959, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 25.481502532958984, + "learning_rate": 6.173469387755102e-06, + "loss": 1.9832, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 29.6664981842041, + "learning_rate": 6.122448979591837e-06, + "loss": 1.9222, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 26.30698585510254, + "learning_rate": 6.071428571428571e-06, + "loss": 1.9897, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 31.827558517456055, + "learning_rate": 6.020408163265307e-06, + "loss": 1.8615, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 24.80223846435547, + "learning_rate": 5.969387755102042e-06, + "loss": 1.9579, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 36.134700775146484, + "learning_rate": 5.918367346938776e-06, + "loss": 1.723, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 30.388233184814453, + "learning_rate": 5.867346938775511e-06, + "loss": 1.9736, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 32.231563568115234, + "learning_rate": 5.816326530612246e-06, + "loss": 1.9228, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 38.05869674682617, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.9159, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 27.256147384643555, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.8072, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 25.67181396484375, + "learning_rate": 5.663265306122449e-06, + "loss": 1.8633, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 31.8681697845459, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.9822, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 32.85325241088867, + "learning_rate": 5.561224489795919e-06, + "loss": 1.8166, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 35.64312744140625, + "learning_rate": 5.510204081632653e-06, + "loss": 1.7166, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 24.276235580444336, + "learning_rate": 5.459183673469388e-06, + "loss": 1.7593, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 29.371950149536133, + "learning_rate": 5.408163265306123e-06, + "loss": 1.8124, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 23.76220703125, + "learning_rate": 5.357142857142857e-06, + "loss": 1.7775, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 37.103050231933594, + "learning_rate": 5.306122448979593e-06, + "loss": 1.9253, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 20.0811767578125, + "learning_rate": 5.255102040816327e-06, + "loss": 1.8711, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 35.33123016357422, + "learning_rate": 5.204081632653062e-06, + "loss": 1.8764, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 31.880672454833984, + "learning_rate": 5.153061224489796e-06, + "loss": 1.8929, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 21.682334899902344, + "learning_rate": 5.1020408163265315e-06, + "loss": 2.0377, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 34.68608474731445, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.9341, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 25.59632110595703, + "learning_rate": 5e-06, + "loss": 1.8264, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.8502724170684814, + "eval_runtime": 6.6208, + "eval_samples_per_second": 106.483, + "eval_steps_per_second": 53.317, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 33.780616760253906, + "learning_rate": 4.948979591836735e-06, + "loss": 1.8315, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 23.005069732666016, + "learning_rate": 4.897959183673469e-06, + "loss": 1.8434, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 27.338787078857422, + "learning_rate": 4.846938775510204e-06, + "loss": 1.9102, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 26.87493133544922, + "learning_rate": 4.795918367346939e-06, + "loss": 1.8408, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 33.59117126464844, + "learning_rate": 4.744897959183674e-06, + "loss": 1.8633, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 38.98092269897461, + "learning_rate": 4.693877551020409e-06, + "loss": 1.8187, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 28.7203369140625, + "learning_rate": 4.642857142857144e-06, + "loss": 1.8425, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 30.91414451599121, + "learning_rate": 4.591836734693878e-06, + "loss": 1.8526, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 29.04154396057129, + "learning_rate": 4.540816326530613e-06, + "loss": 1.8913, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 29.638099670410156, + "learning_rate": 4.489795918367348e-06, + "loss": 1.8736, + "step": 310 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4524488042102784.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-310/training_args.bin b/checkpoints/checkpoint-310/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-310/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-320/README.md b/checkpoints/checkpoint-320/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-320/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-320/adapter_config.json b/checkpoints/checkpoint-320/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-320/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-320/adapter_model.safetensors b/checkpoints/checkpoint-320/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4190be6733a609c5fdd700edfd4a8f977fcc42f9 --- /dev/null +++ b/checkpoints/checkpoint-320/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e9a65f1e661aa15f156d32b6e97a0d64aa2bdbf8aa1b828f5de5ac715cdead4 +size 41248 diff --git a/checkpoints/checkpoint-320/chat_template.jinja b/checkpoints/checkpoint-320/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-320/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-320/optimizer.pt b/checkpoints/checkpoint-320/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5c1f22a4a2bb0a6dcf01e2bdd38feb6f73ac15b --- /dev/null +++ b/checkpoints/checkpoint-320/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e54bf4c5e909a86a00621a22b0e87664662fe6fbcc4953d43bb480b9dfddb478 +size 38953 diff --git a/checkpoints/checkpoint-320/rng_state.pth b/checkpoints/checkpoint-320/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-320/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-320/scheduler.pt b/checkpoints/checkpoint-320/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4f5b30908ceab72809ac4e360212bb7955afe06 --- /dev/null +++ b/checkpoints/checkpoint-320/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebddd736ccd70376fdd6eb3d0feea2b93a82052a161e00dbb6eb67aa0e6682c5 +size 1465 diff --git a/checkpoints/checkpoint-320/special_tokens_map.json b/checkpoints/checkpoint-320/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-320/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-320/tokenizer.json b/checkpoints/checkpoint-320/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-320/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-320/tokenizer_config.json b/checkpoints/checkpoint-320/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-320/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-320/trainer_state.json b/checkpoints/checkpoint-320/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..210087c02a55026565b698d745417fb80a05e48a --- /dev/null +++ b/checkpoints/checkpoint-320/trainer_state.json @@ -0,0 +1,2298 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.807061790668348, + "eval_steps": 100, + "global_step": 320, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 25.63576316833496, + "learning_rate": 9.03061224489796e-06, + "loss": 1.9399, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 21.650251388549805, + "learning_rate": 8.979591836734695e-06, + "loss": 1.9565, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 29.605735778808594, + "learning_rate": 8.92857142857143e-06, + "loss": 1.729, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 23.98230743408203, + "learning_rate": 8.877551020408163e-06, + "loss": 1.9399, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 20.37510108947754, + "learning_rate": 8.826530612244899e-06, + "loss": 1.7322, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 25.876188278198242, + "learning_rate": 8.775510204081633e-06, + "loss": 1.9444, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 32.07249069213867, + "learning_rate": 8.724489795918369e-06, + "loss": 1.9364, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 28.014524459838867, + "learning_rate": 8.673469387755103e-06, + "loss": 1.757, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 30.82647132873535, + "learning_rate": 8.622448979591837e-06, + "loss": 1.9067, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 30.651660919189453, + "learning_rate": 8.571428571428571e-06, + "loss": 1.9906, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 25.239904403686523, + "learning_rate": 8.520408163265307e-06, + "loss": 1.8914, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 21.33747673034668, + "learning_rate": 8.469387755102042e-06, + "loss": 1.9999, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 25.255064010620117, + "learning_rate": 8.418367346938776e-06, + "loss": 1.8941, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 24.443973541259766, + "learning_rate": 8.36734693877551e-06, + "loss": 1.7679, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 25.473894119262695, + "learning_rate": 8.316326530612246e-06, + "loss": 1.7876, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 26.28467559814453, + "learning_rate": 8.26530612244898e-06, + "loss": 1.6761, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 24.488052368164062, + "learning_rate": 8.214285714285714e-06, + "loss": 2.0022, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 30.074064254760742, + "learning_rate": 8.16326530612245e-06, + "loss": 1.7747, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 23.73440170288086, + "learning_rate": 8.112244897959184e-06, + "loss": 1.8468, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 22.338869094848633, + "learning_rate": 8.06122448979592e-06, + "loss": 1.8611, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 24.844266891479492, + "learning_rate": 8.010204081632654e-06, + "loss": 1.9285, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 29.65668487548828, + "learning_rate": 7.959183673469388e-06, + "loss": 1.935, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 26.01723289489746, + "learning_rate": 7.908163265306124e-06, + "loss": 1.7587, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 27.04817771911621, + "learning_rate": 7.857142857142858e-06, + "loss": 1.8878, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 36.23786163330078, + "learning_rate": 7.806122448979593e-06, + "loss": 1.992, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 19.283294677734375, + "learning_rate": 7.755102040816327e-06, + "loss": 1.8066, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 24.24143409729004, + "learning_rate": 7.704081632653061e-06, + "loss": 1.8899, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 25.59832763671875, + "learning_rate": 7.653061224489796e-06, + "loss": 1.9601, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 27.195640563964844, + "learning_rate": 7.602040816326531e-06, + "loss": 1.9561, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 27.854570388793945, + "learning_rate": 7.551020408163265e-06, + "loss": 1.8781, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 25.715761184692383, + "learning_rate": 7.500000000000001e-06, + "loss": 1.8542, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 22.562984466552734, + "learning_rate": 7.448979591836736e-06, + "loss": 1.7681, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 20.540348052978516, + "learning_rate": 7.39795918367347e-06, + "loss": 1.9617, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 24.610937118530273, + "learning_rate": 7.346938775510205e-06, + "loss": 1.9694, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 27.93538475036621, + "learning_rate": 7.295918367346939e-06, + "loss": 1.8858, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 31.466445922851562, + "learning_rate": 7.244897959183675e-06, + "loss": 1.8252, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 26.276226043701172, + "learning_rate": 7.193877551020409e-06, + "loss": 1.8865, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 22.52095603942871, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.7649, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 20.15144157409668, + "learning_rate": 7.091836734693878e-06, + "loss": 2.0158, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 26.405349731445312, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.8932, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 32.94384765625, + "learning_rate": 6.989795918367348e-06, + "loss": 1.7795, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 23.109092712402344, + "learning_rate": 6.938775510204082e-06, + "loss": 1.8383, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 21.75737190246582, + "learning_rate": 6.887755102040817e-06, + "loss": 1.8727, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 22.96916389465332, + "learning_rate": 6.836734693877551e-06, + "loss": 1.8544, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 25.62445640563965, + "learning_rate": 6.785714285714287e-06, + "loss": 1.7503, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 25.430530548095703, + "learning_rate": 6.734693877551021e-06, + "loss": 1.7938, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 26.462881088256836, + "learning_rate": 6.683673469387756e-06, + "loss": 1.8284, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 31.45004653930664, + "learning_rate": 6.63265306122449e-06, + "loss": 2.0328, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 30.525737762451172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.8192, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 25.705707550048828, + "learning_rate": 6.530612244897959e-06, + "loss": 1.8533, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 39.90187454223633, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.9483, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 28.0180721282959, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.8132, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 34.821372985839844, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.9599, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 24.018394470214844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.9248, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 24.074344635009766, + "learning_rate": 6.275510204081633e-06, + "loss": 2.0148, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 31.1939754486084, + "learning_rate": 6.224489795918368e-06, + "loss": 1.8959, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 25.481502532958984, + "learning_rate": 6.173469387755102e-06, + "loss": 1.9832, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 29.6664981842041, + "learning_rate": 6.122448979591837e-06, + "loss": 1.9222, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 26.30698585510254, + "learning_rate": 6.071428571428571e-06, + "loss": 1.9897, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 31.827558517456055, + "learning_rate": 6.020408163265307e-06, + "loss": 1.8615, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 24.80223846435547, + "learning_rate": 5.969387755102042e-06, + "loss": 1.9579, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 36.134700775146484, + "learning_rate": 5.918367346938776e-06, + "loss": 1.723, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 30.388233184814453, + "learning_rate": 5.867346938775511e-06, + "loss": 1.9736, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 32.231563568115234, + "learning_rate": 5.816326530612246e-06, + "loss": 1.9228, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 38.05869674682617, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.9159, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 27.256147384643555, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.8072, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 25.67181396484375, + "learning_rate": 5.663265306122449e-06, + "loss": 1.8633, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 31.8681697845459, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.9822, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 32.85325241088867, + "learning_rate": 5.561224489795919e-06, + "loss": 1.8166, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 35.64312744140625, + "learning_rate": 5.510204081632653e-06, + "loss": 1.7166, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 24.276235580444336, + "learning_rate": 5.459183673469388e-06, + "loss": 1.7593, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 29.371950149536133, + "learning_rate": 5.408163265306123e-06, + "loss": 1.8124, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 23.76220703125, + "learning_rate": 5.357142857142857e-06, + "loss": 1.7775, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 37.103050231933594, + "learning_rate": 5.306122448979593e-06, + "loss": 1.9253, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 20.0811767578125, + "learning_rate": 5.255102040816327e-06, + "loss": 1.8711, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 35.33123016357422, + "learning_rate": 5.204081632653062e-06, + "loss": 1.8764, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 31.880672454833984, + "learning_rate": 5.153061224489796e-06, + "loss": 1.8929, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 21.682334899902344, + "learning_rate": 5.1020408163265315e-06, + "loss": 2.0377, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 34.68608474731445, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.9341, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 25.59632110595703, + "learning_rate": 5e-06, + "loss": 1.8264, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.8502724170684814, + "eval_runtime": 6.6208, + "eval_samples_per_second": 106.483, + "eval_steps_per_second": 53.317, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 33.780616760253906, + "learning_rate": 4.948979591836735e-06, + "loss": 1.8315, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 23.005069732666016, + "learning_rate": 4.897959183673469e-06, + "loss": 1.8434, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 27.338787078857422, + "learning_rate": 4.846938775510204e-06, + "loss": 1.9102, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 26.87493133544922, + "learning_rate": 4.795918367346939e-06, + "loss": 1.8408, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 33.59117126464844, + "learning_rate": 4.744897959183674e-06, + "loss": 1.8633, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 38.98092269897461, + "learning_rate": 4.693877551020409e-06, + "loss": 1.8187, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 28.7203369140625, + "learning_rate": 4.642857142857144e-06, + "loss": 1.8425, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 30.91414451599121, + "learning_rate": 4.591836734693878e-06, + "loss": 1.8526, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 29.04154396057129, + "learning_rate": 4.540816326530613e-06, + "loss": 1.8913, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 29.638099670410156, + "learning_rate": 4.489795918367348e-06, + "loss": 1.8736, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 21.630523681640625, + "learning_rate": 4.438775510204082e-06, + "loss": 1.7407, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 28.39365577697754, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.7503, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 30.86245346069336, + "learning_rate": 4.336734693877551e-06, + "loss": 1.885, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 20.29497718811035, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.8725, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 20.04092025756836, + "learning_rate": 4.234693877551021e-06, + "loss": 1.949, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 26.88593101501465, + "learning_rate": 4.183673469387755e-06, + "loss": 1.8807, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 28.06767463684082, + "learning_rate": 4.13265306122449e-06, + "loss": 1.9135, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 35.69569778442383, + "learning_rate": 4.081632653061225e-06, + "loss": 1.8257, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 29.278770446777344, + "learning_rate": 4.03061224489796e-06, + "loss": 1.8459, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 28.043975830078125, + "learning_rate": 3.979591836734694e-06, + "loss": 1.8821, + "step": 320 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4670017735606272.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-320/training_args.bin b/checkpoints/checkpoint-320/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-320/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-330/README.md b/checkpoints/checkpoint-330/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-330/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-330/adapter_config.json b/checkpoints/checkpoint-330/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-330/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-330/adapter_model.safetensors b/checkpoints/checkpoint-330/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb4329e5f17051d99fc64ded85e50c5d1bfc5e2a --- /dev/null +++ b/checkpoints/checkpoint-330/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75ca09d7e1f7e2afa26a2f4332355fe4dbc8d0ac6ae847b2b702c61d8020a6ca +size 41248 diff --git a/checkpoints/checkpoint-330/chat_template.jinja b/checkpoints/checkpoint-330/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-330/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-330/optimizer.pt b/checkpoints/checkpoint-330/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0dc03e1a46e6a876082f0e7bc33fc84b81b874a --- /dev/null +++ b/checkpoints/checkpoint-330/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b436dee34b54bf2eeba7a3befdf8e0f10e06b464edf320ccb2a2fc54632618f +size 38953 diff --git a/checkpoints/checkpoint-330/rng_state.pth b/checkpoints/checkpoint-330/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-330/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-330/scheduler.pt b/checkpoints/checkpoint-330/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7a1604493ce6e99ffb1acb83c2248df4cbcd4a0 --- /dev/null +++ b/checkpoints/checkpoint-330/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd5be53248e4bd911cc8e7dd283e81be626be8a5c09afd82a1589487aa97e6bc +size 1465 diff --git a/checkpoints/checkpoint-330/special_tokens_map.json b/checkpoints/checkpoint-330/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-330/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-330/tokenizer.json b/checkpoints/checkpoint-330/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-330/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-330/tokenizer_config.json b/checkpoints/checkpoint-330/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-330/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-330/trainer_state.json b/checkpoints/checkpoint-330/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..76fa3ed47e0f7753e46a854d725a0a0631dd2cc5 --- /dev/null +++ b/checkpoints/checkpoint-330/trainer_state.json @@ -0,0 +1,2368 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.832282471626734, + "eval_steps": 100, + "global_step": 330, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 25.63576316833496, + "learning_rate": 9.03061224489796e-06, + "loss": 1.9399, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 21.650251388549805, + "learning_rate": 8.979591836734695e-06, + "loss": 1.9565, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 29.605735778808594, + "learning_rate": 8.92857142857143e-06, + "loss": 1.729, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 23.98230743408203, + "learning_rate": 8.877551020408163e-06, + "loss": 1.9399, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 20.37510108947754, + "learning_rate": 8.826530612244899e-06, + "loss": 1.7322, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 25.876188278198242, + "learning_rate": 8.775510204081633e-06, + "loss": 1.9444, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 32.07249069213867, + "learning_rate": 8.724489795918369e-06, + "loss": 1.9364, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 28.014524459838867, + "learning_rate": 8.673469387755103e-06, + "loss": 1.757, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 30.82647132873535, + "learning_rate": 8.622448979591837e-06, + "loss": 1.9067, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 30.651660919189453, + "learning_rate": 8.571428571428571e-06, + "loss": 1.9906, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 25.239904403686523, + "learning_rate": 8.520408163265307e-06, + "loss": 1.8914, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 21.33747673034668, + "learning_rate": 8.469387755102042e-06, + "loss": 1.9999, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 25.255064010620117, + "learning_rate": 8.418367346938776e-06, + "loss": 1.8941, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 24.443973541259766, + "learning_rate": 8.36734693877551e-06, + "loss": 1.7679, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 25.473894119262695, + "learning_rate": 8.316326530612246e-06, + "loss": 1.7876, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 26.28467559814453, + "learning_rate": 8.26530612244898e-06, + "loss": 1.6761, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 24.488052368164062, + "learning_rate": 8.214285714285714e-06, + "loss": 2.0022, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 30.074064254760742, + "learning_rate": 8.16326530612245e-06, + "loss": 1.7747, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 23.73440170288086, + "learning_rate": 8.112244897959184e-06, + "loss": 1.8468, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 22.338869094848633, + "learning_rate": 8.06122448979592e-06, + "loss": 1.8611, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 24.844266891479492, + "learning_rate": 8.010204081632654e-06, + "loss": 1.9285, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 29.65668487548828, + "learning_rate": 7.959183673469388e-06, + "loss": 1.935, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 26.01723289489746, + "learning_rate": 7.908163265306124e-06, + "loss": 1.7587, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 27.04817771911621, + "learning_rate": 7.857142857142858e-06, + "loss": 1.8878, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 36.23786163330078, + "learning_rate": 7.806122448979593e-06, + "loss": 1.992, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 19.283294677734375, + "learning_rate": 7.755102040816327e-06, + "loss": 1.8066, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 24.24143409729004, + "learning_rate": 7.704081632653061e-06, + "loss": 1.8899, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 25.59832763671875, + "learning_rate": 7.653061224489796e-06, + "loss": 1.9601, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 27.195640563964844, + "learning_rate": 7.602040816326531e-06, + "loss": 1.9561, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 27.854570388793945, + "learning_rate": 7.551020408163265e-06, + "loss": 1.8781, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 25.715761184692383, + "learning_rate": 7.500000000000001e-06, + "loss": 1.8542, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 22.562984466552734, + "learning_rate": 7.448979591836736e-06, + "loss": 1.7681, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 20.540348052978516, + "learning_rate": 7.39795918367347e-06, + "loss": 1.9617, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 24.610937118530273, + "learning_rate": 7.346938775510205e-06, + "loss": 1.9694, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 27.93538475036621, + "learning_rate": 7.295918367346939e-06, + "loss": 1.8858, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 31.466445922851562, + "learning_rate": 7.244897959183675e-06, + "loss": 1.8252, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 26.276226043701172, + "learning_rate": 7.193877551020409e-06, + "loss": 1.8865, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 22.52095603942871, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.7649, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 20.15144157409668, + "learning_rate": 7.091836734693878e-06, + "loss": 2.0158, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 26.405349731445312, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.8932, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 32.94384765625, + "learning_rate": 6.989795918367348e-06, + "loss": 1.7795, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 23.109092712402344, + "learning_rate": 6.938775510204082e-06, + "loss": 1.8383, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 21.75737190246582, + "learning_rate": 6.887755102040817e-06, + "loss": 1.8727, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 22.96916389465332, + "learning_rate": 6.836734693877551e-06, + "loss": 1.8544, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 25.62445640563965, + "learning_rate": 6.785714285714287e-06, + "loss": 1.7503, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 25.430530548095703, + "learning_rate": 6.734693877551021e-06, + "loss": 1.7938, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 26.462881088256836, + "learning_rate": 6.683673469387756e-06, + "loss": 1.8284, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 31.45004653930664, + "learning_rate": 6.63265306122449e-06, + "loss": 2.0328, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 30.525737762451172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.8192, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 25.705707550048828, + "learning_rate": 6.530612244897959e-06, + "loss": 1.8533, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 39.90187454223633, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.9483, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 28.0180721282959, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.8132, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 34.821372985839844, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.9599, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 24.018394470214844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.9248, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 24.074344635009766, + "learning_rate": 6.275510204081633e-06, + "loss": 2.0148, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 31.1939754486084, + "learning_rate": 6.224489795918368e-06, + "loss": 1.8959, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 25.481502532958984, + "learning_rate": 6.173469387755102e-06, + "loss": 1.9832, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 29.6664981842041, + "learning_rate": 6.122448979591837e-06, + "loss": 1.9222, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 26.30698585510254, + "learning_rate": 6.071428571428571e-06, + "loss": 1.9897, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 31.827558517456055, + "learning_rate": 6.020408163265307e-06, + "loss": 1.8615, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 24.80223846435547, + "learning_rate": 5.969387755102042e-06, + "loss": 1.9579, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 36.134700775146484, + "learning_rate": 5.918367346938776e-06, + "loss": 1.723, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 30.388233184814453, + "learning_rate": 5.867346938775511e-06, + "loss": 1.9736, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 32.231563568115234, + "learning_rate": 5.816326530612246e-06, + "loss": 1.9228, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 38.05869674682617, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.9159, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 27.256147384643555, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.8072, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 25.67181396484375, + "learning_rate": 5.663265306122449e-06, + "loss": 1.8633, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 31.8681697845459, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.9822, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 32.85325241088867, + "learning_rate": 5.561224489795919e-06, + "loss": 1.8166, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 35.64312744140625, + "learning_rate": 5.510204081632653e-06, + "loss": 1.7166, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 24.276235580444336, + "learning_rate": 5.459183673469388e-06, + "loss": 1.7593, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 29.371950149536133, + "learning_rate": 5.408163265306123e-06, + "loss": 1.8124, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 23.76220703125, + "learning_rate": 5.357142857142857e-06, + "loss": 1.7775, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 37.103050231933594, + "learning_rate": 5.306122448979593e-06, + "loss": 1.9253, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 20.0811767578125, + "learning_rate": 5.255102040816327e-06, + "loss": 1.8711, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 35.33123016357422, + "learning_rate": 5.204081632653062e-06, + "loss": 1.8764, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 31.880672454833984, + "learning_rate": 5.153061224489796e-06, + "loss": 1.8929, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 21.682334899902344, + "learning_rate": 5.1020408163265315e-06, + "loss": 2.0377, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 34.68608474731445, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.9341, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 25.59632110595703, + "learning_rate": 5e-06, + "loss": 1.8264, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.8502724170684814, + "eval_runtime": 6.6208, + "eval_samples_per_second": 106.483, + "eval_steps_per_second": 53.317, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 33.780616760253906, + "learning_rate": 4.948979591836735e-06, + "loss": 1.8315, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 23.005069732666016, + "learning_rate": 4.897959183673469e-06, + "loss": 1.8434, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 27.338787078857422, + "learning_rate": 4.846938775510204e-06, + "loss": 1.9102, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 26.87493133544922, + "learning_rate": 4.795918367346939e-06, + "loss": 1.8408, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 33.59117126464844, + "learning_rate": 4.744897959183674e-06, + "loss": 1.8633, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 38.98092269897461, + "learning_rate": 4.693877551020409e-06, + "loss": 1.8187, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 28.7203369140625, + "learning_rate": 4.642857142857144e-06, + "loss": 1.8425, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 30.91414451599121, + "learning_rate": 4.591836734693878e-06, + "loss": 1.8526, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 29.04154396057129, + "learning_rate": 4.540816326530613e-06, + "loss": 1.8913, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 29.638099670410156, + "learning_rate": 4.489795918367348e-06, + "loss": 1.8736, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 21.630523681640625, + "learning_rate": 4.438775510204082e-06, + "loss": 1.7407, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 28.39365577697754, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.7503, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 30.86245346069336, + "learning_rate": 4.336734693877551e-06, + "loss": 1.885, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 20.29497718811035, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.8725, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 20.04092025756836, + "learning_rate": 4.234693877551021e-06, + "loss": 1.949, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 26.88593101501465, + "learning_rate": 4.183673469387755e-06, + "loss": 1.8807, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 28.06767463684082, + "learning_rate": 4.13265306122449e-06, + "loss": 1.9135, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 35.69569778442383, + "learning_rate": 4.081632653061225e-06, + "loss": 1.8257, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 29.278770446777344, + "learning_rate": 4.03061224489796e-06, + "loss": 1.8459, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 28.043975830078125, + "learning_rate": 3.979591836734694e-06, + "loss": 1.8821, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 24.060317993164062, + "learning_rate": 3.928571428571429e-06, + "loss": 1.853, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 23.929243087768555, + "learning_rate": 3.877551020408164e-06, + "loss": 1.9249, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 37.94782257080078, + "learning_rate": 3.826530612244898e-06, + "loss": 1.9666, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 26.99836540222168, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.8464, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 26.87177085876465, + "learning_rate": 3.724489795918368e-06, + "loss": 1.9178, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 36.669212341308594, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.8469, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 26.681806564331055, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.9433, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 27.62560272216797, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.8022, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 31.047653198242188, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.9952, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 22.605276107788086, + "learning_rate": 3.469387755102041e-06, + "loss": 1.8992, + "step": 330 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4816061255663616.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-330/training_args.bin b/checkpoints/checkpoint-330/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-330/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-340/README.md b/checkpoints/checkpoint-340/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-340/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-340/adapter_config.json b/checkpoints/checkpoint-340/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-340/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-340/adapter_model.safetensors b/checkpoints/checkpoint-340/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a788234fef050a030f1cabbb251dad1c4667f80a --- /dev/null +++ b/checkpoints/checkpoint-340/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66cc339a91863071208aee381d1d0063636de0dfc31b440898233a40f7c0bc68 +size 41248 diff --git a/checkpoints/checkpoint-340/chat_template.jinja b/checkpoints/checkpoint-340/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-340/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-340/optimizer.pt b/checkpoints/checkpoint-340/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..65ba83b753b3eea7472ec4e81467438d4aebd57d --- /dev/null +++ b/checkpoints/checkpoint-340/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dfa8c124a61ebd03de8cecc77a77bcfcb15745e18b8ed142efd4d6e3e4aeadc +size 38953 diff --git a/checkpoints/checkpoint-340/rng_state.pth b/checkpoints/checkpoint-340/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-340/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-340/scheduler.pt b/checkpoints/checkpoint-340/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..df54d867c897406ffa4a056858514fed64c40f4b --- /dev/null +++ b/checkpoints/checkpoint-340/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:900070de68007cc3c6ee56c9d31e466e40c13bf21dd4b01749ac67187add5810 +size 1465 diff --git a/checkpoints/checkpoint-340/special_tokens_map.json b/checkpoints/checkpoint-340/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-340/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-340/tokenizer.json b/checkpoints/checkpoint-340/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-340/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-340/tokenizer_config.json b/checkpoints/checkpoint-340/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-340/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-340/trainer_state.json b/checkpoints/checkpoint-340/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b1af56a0524c6254f956db2774897529976188c7 --- /dev/null +++ b/checkpoints/checkpoint-340/trainer_state.json @@ -0,0 +1,2438 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8575031525851198, + "eval_steps": 100, + "global_step": 340, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 25.63576316833496, + "learning_rate": 9.03061224489796e-06, + "loss": 1.9399, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 21.650251388549805, + "learning_rate": 8.979591836734695e-06, + "loss": 1.9565, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 29.605735778808594, + "learning_rate": 8.92857142857143e-06, + "loss": 1.729, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 23.98230743408203, + "learning_rate": 8.877551020408163e-06, + "loss": 1.9399, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 20.37510108947754, + "learning_rate": 8.826530612244899e-06, + "loss": 1.7322, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 25.876188278198242, + "learning_rate": 8.775510204081633e-06, + "loss": 1.9444, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 32.07249069213867, + "learning_rate": 8.724489795918369e-06, + "loss": 1.9364, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 28.014524459838867, + "learning_rate": 8.673469387755103e-06, + "loss": 1.757, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 30.82647132873535, + "learning_rate": 8.622448979591837e-06, + "loss": 1.9067, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 30.651660919189453, + "learning_rate": 8.571428571428571e-06, + "loss": 1.9906, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 25.239904403686523, + "learning_rate": 8.520408163265307e-06, + "loss": 1.8914, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 21.33747673034668, + "learning_rate": 8.469387755102042e-06, + "loss": 1.9999, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 25.255064010620117, + "learning_rate": 8.418367346938776e-06, + "loss": 1.8941, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 24.443973541259766, + "learning_rate": 8.36734693877551e-06, + "loss": 1.7679, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 25.473894119262695, + "learning_rate": 8.316326530612246e-06, + "loss": 1.7876, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 26.28467559814453, + "learning_rate": 8.26530612244898e-06, + "loss": 1.6761, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 24.488052368164062, + "learning_rate": 8.214285714285714e-06, + "loss": 2.0022, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 30.074064254760742, + "learning_rate": 8.16326530612245e-06, + "loss": 1.7747, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 23.73440170288086, + "learning_rate": 8.112244897959184e-06, + "loss": 1.8468, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 22.338869094848633, + "learning_rate": 8.06122448979592e-06, + "loss": 1.8611, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 24.844266891479492, + "learning_rate": 8.010204081632654e-06, + "loss": 1.9285, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 29.65668487548828, + "learning_rate": 7.959183673469388e-06, + "loss": 1.935, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 26.01723289489746, + "learning_rate": 7.908163265306124e-06, + "loss": 1.7587, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 27.04817771911621, + "learning_rate": 7.857142857142858e-06, + "loss": 1.8878, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 36.23786163330078, + "learning_rate": 7.806122448979593e-06, + "loss": 1.992, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 19.283294677734375, + "learning_rate": 7.755102040816327e-06, + "loss": 1.8066, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 24.24143409729004, + "learning_rate": 7.704081632653061e-06, + "loss": 1.8899, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 25.59832763671875, + "learning_rate": 7.653061224489796e-06, + "loss": 1.9601, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 27.195640563964844, + "learning_rate": 7.602040816326531e-06, + "loss": 1.9561, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 27.854570388793945, + "learning_rate": 7.551020408163265e-06, + "loss": 1.8781, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 25.715761184692383, + "learning_rate": 7.500000000000001e-06, + "loss": 1.8542, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 22.562984466552734, + "learning_rate": 7.448979591836736e-06, + "loss": 1.7681, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 20.540348052978516, + "learning_rate": 7.39795918367347e-06, + "loss": 1.9617, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 24.610937118530273, + "learning_rate": 7.346938775510205e-06, + "loss": 1.9694, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 27.93538475036621, + "learning_rate": 7.295918367346939e-06, + "loss": 1.8858, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 31.466445922851562, + "learning_rate": 7.244897959183675e-06, + "loss": 1.8252, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 26.276226043701172, + "learning_rate": 7.193877551020409e-06, + "loss": 1.8865, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 22.52095603942871, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.7649, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 20.15144157409668, + "learning_rate": 7.091836734693878e-06, + "loss": 2.0158, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 26.405349731445312, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.8932, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 32.94384765625, + "learning_rate": 6.989795918367348e-06, + "loss": 1.7795, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 23.109092712402344, + "learning_rate": 6.938775510204082e-06, + "loss": 1.8383, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 21.75737190246582, + "learning_rate": 6.887755102040817e-06, + "loss": 1.8727, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 22.96916389465332, + "learning_rate": 6.836734693877551e-06, + "loss": 1.8544, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 25.62445640563965, + "learning_rate": 6.785714285714287e-06, + "loss": 1.7503, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 25.430530548095703, + "learning_rate": 6.734693877551021e-06, + "loss": 1.7938, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 26.462881088256836, + "learning_rate": 6.683673469387756e-06, + "loss": 1.8284, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 31.45004653930664, + "learning_rate": 6.63265306122449e-06, + "loss": 2.0328, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 30.525737762451172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.8192, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 25.705707550048828, + "learning_rate": 6.530612244897959e-06, + "loss": 1.8533, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 39.90187454223633, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.9483, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 28.0180721282959, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.8132, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 34.821372985839844, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.9599, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 24.018394470214844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.9248, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 24.074344635009766, + "learning_rate": 6.275510204081633e-06, + "loss": 2.0148, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 31.1939754486084, + "learning_rate": 6.224489795918368e-06, + "loss": 1.8959, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 25.481502532958984, + "learning_rate": 6.173469387755102e-06, + "loss": 1.9832, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 29.6664981842041, + "learning_rate": 6.122448979591837e-06, + "loss": 1.9222, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 26.30698585510254, + "learning_rate": 6.071428571428571e-06, + "loss": 1.9897, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 31.827558517456055, + "learning_rate": 6.020408163265307e-06, + "loss": 1.8615, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 24.80223846435547, + "learning_rate": 5.969387755102042e-06, + "loss": 1.9579, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 36.134700775146484, + "learning_rate": 5.918367346938776e-06, + "loss": 1.723, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 30.388233184814453, + "learning_rate": 5.867346938775511e-06, + "loss": 1.9736, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 32.231563568115234, + "learning_rate": 5.816326530612246e-06, + "loss": 1.9228, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 38.05869674682617, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.9159, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 27.256147384643555, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.8072, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 25.67181396484375, + "learning_rate": 5.663265306122449e-06, + "loss": 1.8633, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 31.8681697845459, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.9822, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 32.85325241088867, + "learning_rate": 5.561224489795919e-06, + "loss": 1.8166, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 35.64312744140625, + "learning_rate": 5.510204081632653e-06, + "loss": 1.7166, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 24.276235580444336, + "learning_rate": 5.459183673469388e-06, + "loss": 1.7593, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 29.371950149536133, + "learning_rate": 5.408163265306123e-06, + "loss": 1.8124, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 23.76220703125, + "learning_rate": 5.357142857142857e-06, + "loss": 1.7775, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 37.103050231933594, + "learning_rate": 5.306122448979593e-06, + "loss": 1.9253, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 20.0811767578125, + "learning_rate": 5.255102040816327e-06, + "loss": 1.8711, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 35.33123016357422, + "learning_rate": 5.204081632653062e-06, + "loss": 1.8764, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 31.880672454833984, + "learning_rate": 5.153061224489796e-06, + "loss": 1.8929, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 21.682334899902344, + "learning_rate": 5.1020408163265315e-06, + "loss": 2.0377, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 34.68608474731445, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.9341, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 25.59632110595703, + "learning_rate": 5e-06, + "loss": 1.8264, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.8502724170684814, + "eval_runtime": 6.6208, + "eval_samples_per_second": 106.483, + "eval_steps_per_second": 53.317, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 33.780616760253906, + "learning_rate": 4.948979591836735e-06, + "loss": 1.8315, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 23.005069732666016, + "learning_rate": 4.897959183673469e-06, + "loss": 1.8434, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 27.338787078857422, + "learning_rate": 4.846938775510204e-06, + "loss": 1.9102, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 26.87493133544922, + "learning_rate": 4.795918367346939e-06, + "loss": 1.8408, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 33.59117126464844, + "learning_rate": 4.744897959183674e-06, + "loss": 1.8633, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 38.98092269897461, + "learning_rate": 4.693877551020409e-06, + "loss": 1.8187, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 28.7203369140625, + "learning_rate": 4.642857142857144e-06, + "loss": 1.8425, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 30.91414451599121, + "learning_rate": 4.591836734693878e-06, + "loss": 1.8526, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 29.04154396057129, + "learning_rate": 4.540816326530613e-06, + "loss": 1.8913, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 29.638099670410156, + "learning_rate": 4.489795918367348e-06, + "loss": 1.8736, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 21.630523681640625, + "learning_rate": 4.438775510204082e-06, + "loss": 1.7407, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 28.39365577697754, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.7503, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 30.86245346069336, + "learning_rate": 4.336734693877551e-06, + "loss": 1.885, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 20.29497718811035, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.8725, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 20.04092025756836, + "learning_rate": 4.234693877551021e-06, + "loss": 1.949, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 26.88593101501465, + "learning_rate": 4.183673469387755e-06, + "loss": 1.8807, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 28.06767463684082, + "learning_rate": 4.13265306122449e-06, + "loss": 1.9135, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 35.69569778442383, + "learning_rate": 4.081632653061225e-06, + "loss": 1.8257, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 29.278770446777344, + "learning_rate": 4.03061224489796e-06, + "loss": 1.8459, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 28.043975830078125, + "learning_rate": 3.979591836734694e-06, + "loss": 1.8821, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 24.060317993164062, + "learning_rate": 3.928571428571429e-06, + "loss": 1.853, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 23.929243087768555, + "learning_rate": 3.877551020408164e-06, + "loss": 1.9249, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 37.94782257080078, + "learning_rate": 3.826530612244898e-06, + "loss": 1.9666, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 26.99836540222168, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.8464, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 26.87177085876465, + "learning_rate": 3.724489795918368e-06, + "loss": 1.9178, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 36.669212341308594, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.8469, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 26.681806564331055, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.9433, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 27.62560272216797, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.8022, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 31.047653198242188, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.9952, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 22.605276107788086, + "learning_rate": 3.469387755102041e-06, + "loss": 1.8992, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 27.210397720336914, + "learning_rate": 3.4183673469387756e-06, + "loss": 2.0905, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 19.93979835510254, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.8109, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 28.999895095825195, + "learning_rate": 3.316326530612245e-06, + "loss": 1.8978, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 28.620771408081055, + "learning_rate": 3.2653061224489794e-06, + "loss": 2.0414, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 28.657215118408203, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.8445, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 33.65087127685547, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.8559, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 28.926881790161133, + "learning_rate": 3.112244897959184e-06, + "loss": 1.9608, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 24.56757926940918, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.8948, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 26.79437828063965, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.7803, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 22.516748428344727, + "learning_rate": 2.959183673469388e-06, + "loss": 1.9039, + "step": 340 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4964533773975552.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-340/training_args.bin b/checkpoints/checkpoint-340/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-340/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-350/README.md b/checkpoints/checkpoint-350/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-350/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-350/adapter_config.json b/checkpoints/checkpoint-350/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-350/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-350/adapter_model.safetensors b/checkpoints/checkpoint-350/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a435d18ecbd4c0321a09d7761bfd1a58e524ab2c --- /dev/null +++ b/checkpoints/checkpoint-350/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77e34b31982e76478a69e3c9c195096abbd496a1c67ef74b586fe281952f6532 +size 41248 diff --git a/checkpoints/checkpoint-350/chat_template.jinja b/checkpoints/checkpoint-350/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-350/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-350/optimizer.pt b/checkpoints/checkpoint-350/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f24db7a19b489cfc102dd62d230cf0736643a065 --- /dev/null +++ b/checkpoints/checkpoint-350/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0284a5d4227927b495e80e15384919ff9cfcc32ab14ce38acfe357b83e1ee6c +size 38953 diff --git a/checkpoints/checkpoint-350/rng_state.pth b/checkpoints/checkpoint-350/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-350/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-350/scheduler.pt b/checkpoints/checkpoint-350/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b01ec7f3cd2d0330399e869d791eccfd68bdcca --- /dev/null +++ b/checkpoints/checkpoint-350/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b4b9d7b95f36b63f7d9de4c3a62923517aba0add71d5be226316ad459e6d2c3 +size 1465 diff --git a/checkpoints/checkpoint-350/special_tokens_map.json b/checkpoints/checkpoint-350/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-350/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-350/tokenizer.json b/checkpoints/checkpoint-350/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-350/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-350/tokenizer_config.json b/checkpoints/checkpoint-350/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-350/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-350/trainer_state.json b/checkpoints/checkpoint-350/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0055d5250f11d8a153ea406fa046fc16afb2ec0f --- /dev/null +++ b/checkpoints/checkpoint-350/trainer_state.json @@ -0,0 +1,2508 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8827238335435057, + "eval_steps": 100, + "global_step": 350, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 25.63576316833496, + "learning_rate": 9.03061224489796e-06, + "loss": 1.9399, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 21.650251388549805, + "learning_rate": 8.979591836734695e-06, + "loss": 1.9565, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 29.605735778808594, + "learning_rate": 8.92857142857143e-06, + "loss": 1.729, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 23.98230743408203, + "learning_rate": 8.877551020408163e-06, + "loss": 1.9399, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 20.37510108947754, + "learning_rate": 8.826530612244899e-06, + "loss": 1.7322, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 25.876188278198242, + "learning_rate": 8.775510204081633e-06, + "loss": 1.9444, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 32.07249069213867, + "learning_rate": 8.724489795918369e-06, + "loss": 1.9364, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 28.014524459838867, + "learning_rate": 8.673469387755103e-06, + "loss": 1.757, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 30.82647132873535, + "learning_rate": 8.622448979591837e-06, + "loss": 1.9067, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 30.651660919189453, + "learning_rate": 8.571428571428571e-06, + "loss": 1.9906, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 25.239904403686523, + "learning_rate": 8.520408163265307e-06, + "loss": 1.8914, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 21.33747673034668, + "learning_rate": 8.469387755102042e-06, + "loss": 1.9999, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 25.255064010620117, + "learning_rate": 8.418367346938776e-06, + "loss": 1.8941, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 24.443973541259766, + "learning_rate": 8.36734693877551e-06, + "loss": 1.7679, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 25.473894119262695, + "learning_rate": 8.316326530612246e-06, + "loss": 1.7876, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 26.28467559814453, + "learning_rate": 8.26530612244898e-06, + "loss": 1.6761, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 24.488052368164062, + "learning_rate": 8.214285714285714e-06, + "loss": 2.0022, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 30.074064254760742, + "learning_rate": 8.16326530612245e-06, + "loss": 1.7747, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 23.73440170288086, + "learning_rate": 8.112244897959184e-06, + "loss": 1.8468, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 22.338869094848633, + "learning_rate": 8.06122448979592e-06, + "loss": 1.8611, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 24.844266891479492, + "learning_rate": 8.010204081632654e-06, + "loss": 1.9285, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 29.65668487548828, + "learning_rate": 7.959183673469388e-06, + "loss": 1.935, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 26.01723289489746, + "learning_rate": 7.908163265306124e-06, + "loss": 1.7587, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 27.04817771911621, + "learning_rate": 7.857142857142858e-06, + "loss": 1.8878, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 36.23786163330078, + "learning_rate": 7.806122448979593e-06, + "loss": 1.992, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 19.283294677734375, + "learning_rate": 7.755102040816327e-06, + "loss": 1.8066, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 24.24143409729004, + "learning_rate": 7.704081632653061e-06, + "loss": 1.8899, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 25.59832763671875, + "learning_rate": 7.653061224489796e-06, + "loss": 1.9601, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 27.195640563964844, + "learning_rate": 7.602040816326531e-06, + "loss": 1.9561, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 27.854570388793945, + "learning_rate": 7.551020408163265e-06, + "loss": 1.8781, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 25.715761184692383, + "learning_rate": 7.500000000000001e-06, + "loss": 1.8542, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 22.562984466552734, + "learning_rate": 7.448979591836736e-06, + "loss": 1.7681, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 20.540348052978516, + "learning_rate": 7.39795918367347e-06, + "loss": 1.9617, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 24.610937118530273, + "learning_rate": 7.346938775510205e-06, + "loss": 1.9694, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 27.93538475036621, + "learning_rate": 7.295918367346939e-06, + "loss": 1.8858, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 31.466445922851562, + "learning_rate": 7.244897959183675e-06, + "loss": 1.8252, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 26.276226043701172, + "learning_rate": 7.193877551020409e-06, + "loss": 1.8865, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 22.52095603942871, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.7649, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 20.15144157409668, + "learning_rate": 7.091836734693878e-06, + "loss": 2.0158, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 26.405349731445312, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.8932, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 32.94384765625, + "learning_rate": 6.989795918367348e-06, + "loss": 1.7795, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 23.109092712402344, + "learning_rate": 6.938775510204082e-06, + "loss": 1.8383, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 21.75737190246582, + "learning_rate": 6.887755102040817e-06, + "loss": 1.8727, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 22.96916389465332, + "learning_rate": 6.836734693877551e-06, + "loss": 1.8544, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 25.62445640563965, + "learning_rate": 6.785714285714287e-06, + "loss": 1.7503, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 25.430530548095703, + "learning_rate": 6.734693877551021e-06, + "loss": 1.7938, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 26.462881088256836, + "learning_rate": 6.683673469387756e-06, + "loss": 1.8284, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 31.45004653930664, + "learning_rate": 6.63265306122449e-06, + "loss": 2.0328, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 30.525737762451172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.8192, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 25.705707550048828, + "learning_rate": 6.530612244897959e-06, + "loss": 1.8533, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 39.90187454223633, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.9483, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 28.0180721282959, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.8132, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 34.821372985839844, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.9599, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 24.018394470214844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.9248, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 24.074344635009766, + "learning_rate": 6.275510204081633e-06, + "loss": 2.0148, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 31.1939754486084, + "learning_rate": 6.224489795918368e-06, + "loss": 1.8959, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 25.481502532958984, + "learning_rate": 6.173469387755102e-06, + "loss": 1.9832, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 29.6664981842041, + "learning_rate": 6.122448979591837e-06, + "loss": 1.9222, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 26.30698585510254, + "learning_rate": 6.071428571428571e-06, + "loss": 1.9897, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 31.827558517456055, + "learning_rate": 6.020408163265307e-06, + "loss": 1.8615, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 24.80223846435547, + "learning_rate": 5.969387755102042e-06, + "loss": 1.9579, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 36.134700775146484, + "learning_rate": 5.918367346938776e-06, + "loss": 1.723, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 30.388233184814453, + "learning_rate": 5.867346938775511e-06, + "loss": 1.9736, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 32.231563568115234, + "learning_rate": 5.816326530612246e-06, + "loss": 1.9228, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 38.05869674682617, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.9159, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 27.256147384643555, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.8072, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 25.67181396484375, + "learning_rate": 5.663265306122449e-06, + "loss": 1.8633, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 31.8681697845459, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.9822, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 32.85325241088867, + "learning_rate": 5.561224489795919e-06, + "loss": 1.8166, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 35.64312744140625, + "learning_rate": 5.510204081632653e-06, + "loss": 1.7166, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 24.276235580444336, + "learning_rate": 5.459183673469388e-06, + "loss": 1.7593, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 29.371950149536133, + "learning_rate": 5.408163265306123e-06, + "loss": 1.8124, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 23.76220703125, + "learning_rate": 5.357142857142857e-06, + "loss": 1.7775, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 37.103050231933594, + "learning_rate": 5.306122448979593e-06, + "loss": 1.9253, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 20.0811767578125, + "learning_rate": 5.255102040816327e-06, + "loss": 1.8711, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 35.33123016357422, + "learning_rate": 5.204081632653062e-06, + "loss": 1.8764, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 31.880672454833984, + "learning_rate": 5.153061224489796e-06, + "loss": 1.8929, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 21.682334899902344, + "learning_rate": 5.1020408163265315e-06, + "loss": 2.0377, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 34.68608474731445, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.9341, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 25.59632110595703, + "learning_rate": 5e-06, + "loss": 1.8264, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.8502724170684814, + "eval_runtime": 6.6208, + "eval_samples_per_second": 106.483, + "eval_steps_per_second": 53.317, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 33.780616760253906, + "learning_rate": 4.948979591836735e-06, + "loss": 1.8315, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 23.005069732666016, + "learning_rate": 4.897959183673469e-06, + "loss": 1.8434, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 27.338787078857422, + "learning_rate": 4.846938775510204e-06, + "loss": 1.9102, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 26.87493133544922, + "learning_rate": 4.795918367346939e-06, + "loss": 1.8408, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 33.59117126464844, + "learning_rate": 4.744897959183674e-06, + "loss": 1.8633, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 38.98092269897461, + "learning_rate": 4.693877551020409e-06, + "loss": 1.8187, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 28.7203369140625, + "learning_rate": 4.642857142857144e-06, + "loss": 1.8425, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 30.91414451599121, + "learning_rate": 4.591836734693878e-06, + "loss": 1.8526, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 29.04154396057129, + "learning_rate": 4.540816326530613e-06, + "loss": 1.8913, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 29.638099670410156, + "learning_rate": 4.489795918367348e-06, + "loss": 1.8736, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 21.630523681640625, + "learning_rate": 4.438775510204082e-06, + "loss": 1.7407, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 28.39365577697754, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.7503, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 30.86245346069336, + "learning_rate": 4.336734693877551e-06, + "loss": 1.885, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 20.29497718811035, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.8725, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 20.04092025756836, + "learning_rate": 4.234693877551021e-06, + "loss": 1.949, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 26.88593101501465, + "learning_rate": 4.183673469387755e-06, + "loss": 1.8807, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 28.06767463684082, + "learning_rate": 4.13265306122449e-06, + "loss": 1.9135, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 35.69569778442383, + "learning_rate": 4.081632653061225e-06, + "loss": 1.8257, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 29.278770446777344, + "learning_rate": 4.03061224489796e-06, + "loss": 1.8459, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 28.043975830078125, + "learning_rate": 3.979591836734694e-06, + "loss": 1.8821, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 24.060317993164062, + "learning_rate": 3.928571428571429e-06, + "loss": 1.853, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 23.929243087768555, + "learning_rate": 3.877551020408164e-06, + "loss": 1.9249, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 37.94782257080078, + "learning_rate": 3.826530612244898e-06, + "loss": 1.9666, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 26.99836540222168, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.8464, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 26.87177085876465, + "learning_rate": 3.724489795918368e-06, + "loss": 1.9178, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 36.669212341308594, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.8469, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 26.681806564331055, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.9433, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 27.62560272216797, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.8022, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 31.047653198242188, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.9952, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 22.605276107788086, + "learning_rate": 3.469387755102041e-06, + "loss": 1.8992, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 27.210397720336914, + "learning_rate": 3.4183673469387756e-06, + "loss": 2.0905, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 19.93979835510254, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.8109, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 28.999895095825195, + "learning_rate": 3.316326530612245e-06, + "loss": 1.8978, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 28.620771408081055, + "learning_rate": 3.2653061224489794e-06, + "loss": 2.0414, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 28.657215118408203, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.8445, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 33.65087127685547, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.8559, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 28.926881790161133, + "learning_rate": 3.112244897959184e-06, + "loss": 1.9608, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 24.56757926940918, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.8948, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 26.79437828063965, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.7803, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 22.516748428344727, + "learning_rate": 2.959183673469388e-06, + "loss": 1.9039, + "step": 340 + }, + { + "epoch": 0.8600252206809584, + "grad_norm": 28.779983520507812, + "learning_rate": 2.908163265306123e-06, + "loss": 1.9083, + "step": 341 + }, + { + "epoch": 0.862547288776797, + "grad_norm": 23.90164566040039, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.7973, + "step": 342 + }, + { + "epoch": 0.8650693568726355, + "grad_norm": 25.098011016845703, + "learning_rate": 2.8061224489795917e-06, + "loss": 1.8596, + "step": 343 + }, + { + "epoch": 0.8675914249684742, + "grad_norm": 36.381954193115234, + "learning_rate": 2.7551020408163266e-06, + "loss": 1.8069, + "step": 344 + }, + { + "epoch": 0.8701134930643127, + "grad_norm": 33.54197311401367, + "learning_rate": 2.7040816326530615e-06, + "loss": 1.9138, + "step": 345 + }, + { + "epoch": 0.8726355611601513, + "grad_norm": 27.836380004882812, + "learning_rate": 2.6530612244897964e-06, + "loss": 1.8806, + "step": 346 + }, + { + "epoch": 0.8751576292559899, + "grad_norm": 29.386459350585938, + "learning_rate": 2.602040816326531e-06, + "loss": 1.9654, + "step": 347 + }, + { + "epoch": 0.8776796973518285, + "grad_norm": 25.446636199951172, + "learning_rate": 2.5510204081632657e-06, + "loss": 1.8723, + "step": 348 + }, + { + "epoch": 0.880201765447667, + "grad_norm": 23.149751663208008, + "learning_rate": 2.5e-06, + "loss": 1.7802, + "step": 349 + }, + { + "epoch": 0.8827238335435057, + "grad_norm": 24.944000244140625, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.9328, + "step": 350 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5111161187844096.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-350/training_args.bin b/checkpoints/checkpoint-350/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-350/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-360/README.md b/checkpoints/checkpoint-360/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-360/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-360/adapter_config.json b/checkpoints/checkpoint-360/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-360/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-360/adapter_model.safetensors b/checkpoints/checkpoint-360/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3de628bb62e33746d8dd023b52b688e9167e60f5 --- /dev/null +++ b/checkpoints/checkpoint-360/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2b1501139c4934c63a71024366e4268a6cdf4a6b90c7f0ee3cd9937e05a087c +size 41248 diff --git a/checkpoints/checkpoint-360/chat_template.jinja b/checkpoints/checkpoint-360/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-360/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-360/optimizer.pt b/checkpoints/checkpoint-360/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..98548a40a5b0bf7107df7f0e4249c5ee489f8538 --- /dev/null +++ b/checkpoints/checkpoint-360/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e58187113af2d3dbd30e44793a3fbefdd6daaae454d451327f8a65a40ac0b19 +size 38953 diff --git a/checkpoints/checkpoint-360/rng_state.pth b/checkpoints/checkpoint-360/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-360/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-360/scheduler.pt b/checkpoints/checkpoint-360/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7337b5f20ad00fff725af21861c92cb7ad98990 --- /dev/null +++ b/checkpoints/checkpoint-360/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a6a4e5ddd70e1348d075c7001b166e01fc5e82711e4e01a5558841be28d7c62 +size 1465 diff --git a/checkpoints/checkpoint-360/special_tokens_map.json b/checkpoints/checkpoint-360/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-360/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-360/tokenizer.json b/checkpoints/checkpoint-360/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-360/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-360/tokenizer_config.json b/checkpoints/checkpoint-360/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-360/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-360/trainer_state.json b/checkpoints/checkpoint-360/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ac85e2b4e73f6432d6c41c3dca25881be1794b77 --- /dev/null +++ b/checkpoints/checkpoint-360/trainer_state.json @@ -0,0 +1,2578 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9079445145018915, + "eval_steps": 100, + "global_step": 360, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 25.63576316833496, + "learning_rate": 9.03061224489796e-06, + "loss": 1.9399, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 21.650251388549805, + "learning_rate": 8.979591836734695e-06, + "loss": 1.9565, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 29.605735778808594, + "learning_rate": 8.92857142857143e-06, + "loss": 1.729, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 23.98230743408203, + "learning_rate": 8.877551020408163e-06, + "loss": 1.9399, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 20.37510108947754, + "learning_rate": 8.826530612244899e-06, + "loss": 1.7322, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 25.876188278198242, + "learning_rate": 8.775510204081633e-06, + "loss": 1.9444, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 32.07249069213867, + "learning_rate": 8.724489795918369e-06, + "loss": 1.9364, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 28.014524459838867, + "learning_rate": 8.673469387755103e-06, + "loss": 1.757, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 30.82647132873535, + "learning_rate": 8.622448979591837e-06, + "loss": 1.9067, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 30.651660919189453, + "learning_rate": 8.571428571428571e-06, + "loss": 1.9906, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 25.239904403686523, + "learning_rate": 8.520408163265307e-06, + "loss": 1.8914, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 21.33747673034668, + "learning_rate": 8.469387755102042e-06, + "loss": 1.9999, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 25.255064010620117, + "learning_rate": 8.418367346938776e-06, + "loss": 1.8941, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 24.443973541259766, + "learning_rate": 8.36734693877551e-06, + "loss": 1.7679, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 25.473894119262695, + "learning_rate": 8.316326530612246e-06, + "loss": 1.7876, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 26.28467559814453, + "learning_rate": 8.26530612244898e-06, + "loss": 1.6761, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 24.488052368164062, + "learning_rate": 8.214285714285714e-06, + "loss": 2.0022, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 30.074064254760742, + "learning_rate": 8.16326530612245e-06, + "loss": 1.7747, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 23.73440170288086, + "learning_rate": 8.112244897959184e-06, + "loss": 1.8468, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 22.338869094848633, + "learning_rate": 8.06122448979592e-06, + "loss": 1.8611, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 24.844266891479492, + "learning_rate": 8.010204081632654e-06, + "loss": 1.9285, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 29.65668487548828, + "learning_rate": 7.959183673469388e-06, + "loss": 1.935, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 26.01723289489746, + "learning_rate": 7.908163265306124e-06, + "loss": 1.7587, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 27.04817771911621, + "learning_rate": 7.857142857142858e-06, + "loss": 1.8878, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 36.23786163330078, + "learning_rate": 7.806122448979593e-06, + "loss": 1.992, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 19.283294677734375, + "learning_rate": 7.755102040816327e-06, + "loss": 1.8066, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 24.24143409729004, + "learning_rate": 7.704081632653061e-06, + "loss": 1.8899, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 25.59832763671875, + "learning_rate": 7.653061224489796e-06, + "loss": 1.9601, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 27.195640563964844, + "learning_rate": 7.602040816326531e-06, + "loss": 1.9561, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 27.854570388793945, + "learning_rate": 7.551020408163265e-06, + "loss": 1.8781, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 25.715761184692383, + "learning_rate": 7.500000000000001e-06, + "loss": 1.8542, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 22.562984466552734, + "learning_rate": 7.448979591836736e-06, + "loss": 1.7681, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 20.540348052978516, + "learning_rate": 7.39795918367347e-06, + "loss": 1.9617, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 24.610937118530273, + "learning_rate": 7.346938775510205e-06, + "loss": 1.9694, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 27.93538475036621, + "learning_rate": 7.295918367346939e-06, + "loss": 1.8858, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 31.466445922851562, + "learning_rate": 7.244897959183675e-06, + "loss": 1.8252, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 26.276226043701172, + "learning_rate": 7.193877551020409e-06, + "loss": 1.8865, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 22.52095603942871, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.7649, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 20.15144157409668, + "learning_rate": 7.091836734693878e-06, + "loss": 2.0158, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 26.405349731445312, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.8932, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 32.94384765625, + "learning_rate": 6.989795918367348e-06, + "loss": 1.7795, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 23.109092712402344, + "learning_rate": 6.938775510204082e-06, + "loss": 1.8383, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 21.75737190246582, + "learning_rate": 6.887755102040817e-06, + "loss": 1.8727, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 22.96916389465332, + "learning_rate": 6.836734693877551e-06, + "loss": 1.8544, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 25.62445640563965, + "learning_rate": 6.785714285714287e-06, + "loss": 1.7503, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 25.430530548095703, + "learning_rate": 6.734693877551021e-06, + "loss": 1.7938, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 26.462881088256836, + "learning_rate": 6.683673469387756e-06, + "loss": 1.8284, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 31.45004653930664, + "learning_rate": 6.63265306122449e-06, + "loss": 2.0328, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 30.525737762451172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.8192, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 25.705707550048828, + "learning_rate": 6.530612244897959e-06, + "loss": 1.8533, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 39.90187454223633, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.9483, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 28.0180721282959, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.8132, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 34.821372985839844, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.9599, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 24.018394470214844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.9248, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 24.074344635009766, + "learning_rate": 6.275510204081633e-06, + "loss": 2.0148, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 31.1939754486084, + "learning_rate": 6.224489795918368e-06, + "loss": 1.8959, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 25.481502532958984, + "learning_rate": 6.173469387755102e-06, + "loss": 1.9832, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 29.6664981842041, + "learning_rate": 6.122448979591837e-06, + "loss": 1.9222, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 26.30698585510254, + "learning_rate": 6.071428571428571e-06, + "loss": 1.9897, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 31.827558517456055, + "learning_rate": 6.020408163265307e-06, + "loss": 1.8615, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 24.80223846435547, + "learning_rate": 5.969387755102042e-06, + "loss": 1.9579, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 36.134700775146484, + "learning_rate": 5.918367346938776e-06, + "loss": 1.723, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 30.388233184814453, + "learning_rate": 5.867346938775511e-06, + "loss": 1.9736, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 32.231563568115234, + "learning_rate": 5.816326530612246e-06, + "loss": 1.9228, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 38.05869674682617, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.9159, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 27.256147384643555, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.8072, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 25.67181396484375, + "learning_rate": 5.663265306122449e-06, + "loss": 1.8633, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 31.8681697845459, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.9822, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 32.85325241088867, + "learning_rate": 5.561224489795919e-06, + "loss": 1.8166, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 35.64312744140625, + "learning_rate": 5.510204081632653e-06, + "loss": 1.7166, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 24.276235580444336, + "learning_rate": 5.459183673469388e-06, + "loss": 1.7593, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 29.371950149536133, + "learning_rate": 5.408163265306123e-06, + "loss": 1.8124, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 23.76220703125, + "learning_rate": 5.357142857142857e-06, + "loss": 1.7775, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 37.103050231933594, + "learning_rate": 5.306122448979593e-06, + "loss": 1.9253, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 20.0811767578125, + "learning_rate": 5.255102040816327e-06, + "loss": 1.8711, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 35.33123016357422, + "learning_rate": 5.204081632653062e-06, + "loss": 1.8764, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 31.880672454833984, + "learning_rate": 5.153061224489796e-06, + "loss": 1.8929, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 21.682334899902344, + "learning_rate": 5.1020408163265315e-06, + "loss": 2.0377, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 34.68608474731445, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.9341, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 25.59632110595703, + "learning_rate": 5e-06, + "loss": 1.8264, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.8502724170684814, + "eval_runtime": 6.6208, + "eval_samples_per_second": 106.483, + "eval_steps_per_second": 53.317, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 33.780616760253906, + "learning_rate": 4.948979591836735e-06, + "loss": 1.8315, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 23.005069732666016, + "learning_rate": 4.897959183673469e-06, + "loss": 1.8434, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 27.338787078857422, + "learning_rate": 4.846938775510204e-06, + "loss": 1.9102, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 26.87493133544922, + "learning_rate": 4.795918367346939e-06, + "loss": 1.8408, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 33.59117126464844, + "learning_rate": 4.744897959183674e-06, + "loss": 1.8633, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 38.98092269897461, + "learning_rate": 4.693877551020409e-06, + "loss": 1.8187, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 28.7203369140625, + "learning_rate": 4.642857142857144e-06, + "loss": 1.8425, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 30.91414451599121, + "learning_rate": 4.591836734693878e-06, + "loss": 1.8526, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 29.04154396057129, + "learning_rate": 4.540816326530613e-06, + "loss": 1.8913, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 29.638099670410156, + "learning_rate": 4.489795918367348e-06, + "loss": 1.8736, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 21.630523681640625, + "learning_rate": 4.438775510204082e-06, + "loss": 1.7407, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 28.39365577697754, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.7503, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 30.86245346069336, + "learning_rate": 4.336734693877551e-06, + "loss": 1.885, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 20.29497718811035, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.8725, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 20.04092025756836, + "learning_rate": 4.234693877551021e-06, + "loss": 1.949, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 26.88593101501465, + "learning_rate": 4.183673469387755e-06, + "loss": 1.8807, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 28.06767463684082, + "learning_rate": 4.13265306122449e-06, + "loss": 1.9135, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 35.69569778442383, + "learning_rate": 4.081632653061225e-06, + "loss": 1.8257, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 29.278770446777344, + "learning_rate": 4.03061224489796e-06, + "loss": 1.8459, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 28.043975830078125, + "learning_rate": 3.979591836734694e-06, + "loss": 1.8821, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 24.060317993164062, + "learning_rate": 3.928571428571429e-06, + "loss": 1.853, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 23.929243087768555, + "learning_rate": 3.877551020408164e-06, + "loss": 1.9249, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 37.94782257080078, + "learning_rate": 3.826530612244898e-06, + "loss": 1.9666, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 26.99836540222168, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.8464, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 26.87177085876465, + "learning_rate": 3.724489795918368e-06, + "loss": 1.9178, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 36.669212341308594, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.8469, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 26.681806564331055, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.9433, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 27.62560272216797, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.8022, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 31.047653198242188, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.9952, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 22.605276107788086, + "learning_rate": 3.469387755102041e-06, + "loss": 1.8992, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 27.210397720336914, + "learning_rate": 3.4183673469387756e-06, + "loss": 2.0905, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 19.93979835510254, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.8109, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 28.999895095825195, + "learning_rate": 3.316326530612245e-06, + "loss": 1.8978, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 28.620771408081055, + "learning_rate": 3.2653061224489794e-06, + "loss": 2.0414, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 28.657215118408203, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.8445, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 33.65087127685547, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.8559, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 28.926881790161133, + "learning_rate": 3.112244897959184e-06, + "loss": 1.9608, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 24.56757926940918, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.8948, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 26.79437828063965, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.7803, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 22.516748428344727, + "learning_rate": 2.959183673469388e-06, + "loss": 1.9039, + "step": 340 + }, + { + "epoch": 0.8600252206809584, + "grad_norm": 28.779983520507812, + "learning_rate": 2.908163265306123e-06, + "loss": 1.9083, + "step": 341 + }, + { + "epoch": 0.862547288776797, + "grad_norm": 23.90164566040039, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.7973, + "step": 342 + }, + { + "epoch": 0.8650693568726355, + "grad_norm": 25.098011016845703, + "learning_rate": 2.8061224489795917e-06, + "loss": 1.8596, + "step": 343 + }, + { + "epoch": 0.8675914249684742, + "grad_norm": 36.381954193115234, + "learning_rate": 2.7551020408163266e-06, + "loss": 1.8069, + "step": 344 + }, + { + "epoch": 0.8701134930643127, + "grad_norm": 33.54197311401367, + "learning_rate": 2.7040816326530615e-06, + "loss": 1.9138, + "step": 345 + }, + { + "epoch": 0.8726355611601513, + "grad_norm": 27.836380004882812, + "learning_rate": 2.6530612244897964e-06, + "loss": 1.8806, + "step": 346 + }, + { + "epoch": 0.8751576292559899, + "grad_norm": 29.386459350585938, + "learning_rate": 2.602040816326531e-06, + "loss": 1.9654, + "step": 347 + }, + { + "epoch": 0.8776796973518285, + "grad_norm": 25.446636199951172, + "learning_rate": 2.5510204081632657e-06, + "loss": 1.8723, + "step": 348 + }, + { + "epoch": 0.880201765447667, + "grad_norm": 23.149751663208008, + "learning_rate": 2.5e-06, + "loss": 1.7802, + "step": 349 + }, + { + "epoch": 0.8827238335435057, + "grad_norm": 24.944000244140625, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.9328, + "step": 350 + }, + { + "epoch": 0.8852459016393442, + "grad_norm": 24.133886337280273, + "learning_rate": 2.3979591836734696e-06, + "loss": 1.8824, + "step": 351 + }, + { + "epoch": 0.8877679697351829, + "grad_norm": 29.368667602539062, + "learning_rate": 2.3469387755102044e-06, + "loss": 1.8283, + "step": 352 + }, + { + "epoch": 0.8902900378310215, + "grad_norm": 27.814085006713867, + "learning_rate": 2.295918367346939e-06, + "loss": 1.8401, + "step": 353 + }, + { + "epoch": 0.89281210592686, + "grad_norm": 36.647239685058594, + "learning_rate": 2.244897959183674e-06, + "loss": 1.9979, + "step": 354 + }, + { + "epoch": 0.8953341740226987, + "grad_norm": 30.776464462280273, + "learning_rate": 2.1938775510204083e-06, + "loss": 1.971, + "step": 355 + }, + { + "epoch": 0.8978562421185372, + "grad_norm": 21.568681716918945, + "learning_rate": 2.1428571428571427e-06, + "loss": 2.0157, + "step": 356 + }, + { + "epoch": 0.9003783102143758, + "grad_norm": 30.973060607910156, + "learning_rate": 2.0918367346938776e-06, + "loss": 1.9058, + "step": 357 + }, + { + "epoch": 0.9029003783102144, + "grad_norm": 25.841272354125977, + "learning_rate": 2.0408163265306125e-06, + "loss": 1.8946, + "step": 358 + }, + { + "epoch": 0.905422446406053, + "grad_norm": 23.440290451049805, + "learning_rate": 1.989795918367347e-06, + "loss": 1.9487, + "step": 359 + }, + { + "epoch": 0.9079445145018915, + "grad_norm": 25.26444435119629, + "learning_rate": 1.938775510204082e-06, + "loss": 1.7457, + "step": 360 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5260194244214784.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-360/training_args.bin b/checkpoints/checkpoint-360/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-360/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-370/README.md b/checkpoints/checkpoint-370/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-370/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-370/adapter_config.json b/checkpoints/checkpoint-370/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-370/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-370/adapter_model.safetensors b/checkpoints/checkpoint-370/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94ed420a0c174b6e97e1cae026c0c5d9341a0c33 --- /dev/null +++ b/checkpoints/checkpoint-370/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e48f431a08ba98fa07e44447c41cd2fbe09a907ed37d16844ce19dd63d029afc +size 41248 diff --git a/checkpoints/checkpoint-370/chat_template.jinja b/checkpoints/checkpoint-370/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-370/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-370/optimizer.pt b/checkpoints/checkpoint-370/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..48b6643118a524c97e02a79aadf6e018ae1bcf68 --- /dev/null +++ b/checkpoints/checkpoint-370/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac66ffe80fd639fa3fa58b9813eaf3b57fa539dac1b92f53a0adcfc3fab87575 +size 38953 diff --git a/checkpoints/checkpoint-370/rng_state.pth b/checkpoints/checkpoint-370/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-370/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-370/scheduler.pt b/checkpoints/checkpoint-370/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..03db78c3aba9ac99403d13298775163b1fd86f7c --- /dev/null +++ b/checkpoints/checkpoint-370/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdafef646b64e17c9b8995204033f230f5834543ec381bb979bef6a8b3204f47 +size 1465 diff --git a/checkpoints/checkpoint-370/special_tokens_map.json b/checkpoints/checkpoint-370/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-370/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-370/tokenizer.json b/checkpoints/checkpoint-370/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-370/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-370/tokenizer_config.json b/checkpoints/checkpoint-370/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-370/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-370/trainer_state.json b/checkpoints/checkpoint-370/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8975c691fc3e4c0f3c3a0a27a416ca5221b66be1 --- /dev/null +++ b/checkpoints/checkpoint-370/trainer_state.json @@ -0,0 +1,2648 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9331651954602774, + "eval_steps": 100, + "global_step": 370, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 25.63576316833496, + "learning_rate": 9.03061224489796e-06, + "loss": 1.9399, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 21.650251388549805, + "learning_rate": 8.979591836734695e-06, + "loss": 1.9565, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 29.605735778808594, + "learning_rate": 8.92857142857143e-06, + "loss": 1.729, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 23.98230743408203, + "learning_rate": 8.877551020408163e-06, + "loss": 1.9399, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 20.37510108947754, + "learning_rate": 8.826530612244899e-06, + "loss": 1.7322, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 25.876188278198242, + "learning_rate": 8.775510204081633e-06, + "loss": 1.9444, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 32.07249069213867, + "learning_rate": 8.724489795918369e-06, + "loss": 1.9364, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 28.014524459838867, + "learning_rate": 8.673469387755103e-06, + "loss": 1.757, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 30.82647132873535, + "learning_rate": 8.622448979591837e-06, + "loss": 1.9067, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 30.651660919189453, + "learning_rate": 8.571428571428571e-06, + "loss": 1.9906, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 25.239904403686523, + "learning_rate": 8.520408163265307e-06, + "loss": 1.8914, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 21.33747673034668, + "learning_rate": 8.469387755102042e-06, + "loss": 1.9999, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 25.255064010620117, + "learning_rate": 8.418367346938776e-06, + "loss": 1.8941, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 24.443973541259766, + "learning_rate": 8.36734693877551e-06, + "loss": 1.7679, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 25.473894119262695, + "learning_rate": 8.316326530612246e-06, + "loss": 1.7876, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 26.28467559814453, + "learning_rate": 8.26530612244898e-06, + "loss": 1.6761, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 24.488052368164062, + "learning_rate": 8.214285714285714e-06, + "loss": 2.0022, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 30.074064254760742, + "learning_rate": 8.16326530612245e-06, + "loss": 1.7747, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 23.73440170288086, + "learning_rate": 8.112244897959184e-06, + "loss": 1.8468, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 22.338869094848633, + "learning_rate": 8.06122448979592e-06, + "loss": 1.8611, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 24.844266891479492, + "learning_rate": 8.010204081632654e-06, + "loss": 1.9285, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 29.65668487548828, + "learning_rate": 7.959183673469388e-06, + "loss": 1.935, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 26.01723289489746, + "learning_rate": 7.908163265306124e-06, + "loss": 1.7587, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 27.04817771911621, + "learning_rate": 7.857142857142858e-06, + "loss": 1.8878, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 36.23786163330078, + "learning_rate": 7.806122448979593e-06, + "loss": 1.992, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 19.283294677734375, + "learning_rate": 7.755102040816327e-06, + "loss": 1.8066, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 24.24143409729004, + "learning_rate": 7.704081632653061e-06, + "loss": 1.8899, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 25.59832763671875, + "learning_rate": 7.653061224489796e-06, + "loss": 1.9601, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 27.195640563964844, + "learning_rate": 7.602040816326531e-06, + "loss": 1.9561, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 27.854570388793945, + "learning_rate": 7.551020408163265e-06, + "loss": 1.8781, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 25.715761184692383, + "learning_rate": 7.500000000000001e-06, + "loss": 1.8542, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 22.562984466552734, + "learning_rate": 7.448979591836736e-06, + "loss": 1.7681, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 20.540348052978516, + "learning_rate": 7.39795918367347e-06, + "loss": 1.9617, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 24.610937118530273, + "learning_rate": 7.346938775510205e-06, + "loss": 1.9694, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 27.93538475036621, + "learning_rate": 7.295918367346939e-06, + "loss": 1.8858, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 31.466445922851562, + "learning_rate": 7.244897959183675e-06, + "loss": 1.8252, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 26.276226043701172, + "learning_rate": 7.193877551020409e-06, + "loss": 1.8865, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 22.52095603942871, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.7649, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 20.15144157409668, + "learning_rate": 7.091836734693878e-06, + "loss": 2.0158, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 26.405349731445312, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.8932, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 32.94384765625, + "learning_rate": 6.989795918367348e-06, + "loss": 1.7795, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 23.109092712402344, + "learning_rate": 6.938775510204082e-06, + "loss": 1.8383, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 21.75737190246582, + "learning_rate": 6.887755102040817e-06, + "loss": 1.8727, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 22.96916389465332, + "learning_rate": 6.836734693877551e-06, + "loss": 1.8544, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 25.62445640563965, + "learning_rate": 6.785714285714287e-06, + "loss": 1.7503, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 25.430530548095703, + "learning_rate": 6.734693877551021e-06, + "loss": 1.7938, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 26.462881088256836, + "learning_rate": 6.683673469387756e-06, + "loss": 1.8284, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 31.45004653930664, + "learning_rate": 6.63265306122449e-06, + "loss": 2.0328, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 30.525737762451172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.8192, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 25.705707550048828, + "learning_rate": 6.530612244897959e-06, + "loss": 1.8533, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 39.90187454223633, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.9483, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 28.0180721282959, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.8132, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 34.821372985839844, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.9599, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 24.018394470214844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.9248, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 24.074344635009766, + "learning_rate": 6.275510204081633e-06, + "loss": 2.0148, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 31.1939754486084, + "learning_rate": 6.224489795918368e-06, + "loss": 1.8959, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 25.481502532958984, + "learning_rate": 6.173469387755102e-06, + "loss": 1.9832, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 29.6664981842041, + "learning_rate": 6.122448979591837e-06, + "loss": 1.9222, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 26.30698585510254, + "learning_rate": 6.071428571428571e-06, + "loss": 1.9897, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 31.827558517456055, + "learning_rate": 6.020408163265307e-06, + "loss": 1.8615, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 24.80223846435547, + "learning_rate": 5.969387755102042e-06, + "loss": 1.9579, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 36.134700775146484, + "learning_rate": 5.918367346938776e-06, + "loss": 1.723, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 30.388233184814453, + "learning_rate": 5.867346938775511e-06, + "loss": 1.9736, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 32.231563568115234, + "learning_rate": 5.816326530612246e-06, + "loss": 1.9228, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 38.05869674682617, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.9159, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 27.256147384643555, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.8072, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 25.67181396484375, + "learning_rate": 5.663265306122449e-06, + "loss": 1.8633, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 31.8681697845459, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.9822, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 32.85325241088867, + "learning_rate": 5.561224489795919e-06, + "loss": 1.8166, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 35.64312744140625, + "learning_rate": 5.510204081632653e-06, + "loss": 1.7166, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 24.276235580444336, + "learning_rate": 5.459183673469388e-06, + "loss": 1.7593, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 29.371950149536133, + "learning_rate": 5.408163265306123e-06, + "loss": 1.8124, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 23.76220703125, + "learning_rate": 5.357142857142857e-06, + "loss": 1.7775, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 37.103050231933594, + "learning_rate": 5.306122448979593e-06, + "loss": 1.9253, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 20.0811767578125, + "learning_rate": 5.255102040816327e-06, + "loss": 1.8711, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 35.33123016357422, + "learning_rate": 5.204081632653062e-06, + "loss": 1.8764, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 31.880672454833984, + "learning_rate": 5.153061224489796e-06, + "loss": 1.8929, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 21.682334899902344, + "learning_rate": 5.1020408163265315e-06, + "loss": 2.0377, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 34.68608474731445, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.9341, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 25.59632110595703, + "learning_rate": 5e-06, + "loss": 1.8264, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.8502724170684814, + "eval_runtime": 6.6208, + "eval_samples_per_second": 106.483, + "eval_steps_per_second": 53.317, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 33.780616760253906, + "learning_rate": 4.948979591836735e-06, + "loss": 1.8315, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 23.005069732666016, + "learning_rate": 4.897959183673469e-06, + "loss": 1.8434, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 27.338787078857422, + "learning_rate": 4.846938775510204e-06, + "loss": 1.9102, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 26.87493133544922, + "learning_rate": 4.795918367346939e-06, + "loss": 1.8408, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 33.59117126464844, + "learning_rate": 4.744897959183674e-06, + "loss": 1.8633, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 38.98092269897461, + "learning_rate": 4.693877551020409e-06, + "loss": 1.8187, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 28.7203369140625, + "learning_rate": 4.642857142857144e-06, + "loss": 1.8425, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 30.91414451599121, + "learning_rate": 4.591836734693878e-06, + "loss": 1.8526, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 29.04154396057129, + "learning_rate": 4.540816326530613e-06, + "loss": 1.8913, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 29.638099670410156, + "learning_rate": 4.489795918367348e-06, + "loss": 1.8736, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 21.630523681640625, + "learning_rate": 4.438775510204082e-06, + "loss": 1.7407, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 28.39365577697754, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.7503, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 30.86245346069336, + "learning_rate": 4.336734693877551e-06, + "loss": 1.885, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 20.29497718811035, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.8725, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 20.04092025756836, + "learning_rate": 4.234693877551021e-06, + "loss": 1.949, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 26.88593101501465, + "learning_rate": 4.183673469387755e-06, + "loss": 1.8807, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 28.06767463684082, + "learning_rate": 4.13265306122449e-06, + "loss": 1.9135, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 35.69569778442383, + "learning_rate": 4.081632653061225e-06, + "loss": 1.8257, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 29.278770446777344, + "learning_rate": 4.03061224489796e-06, + "loss": 1.8459, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 28.043975830078125, + "learning_rate": 3.979591836734694e-06, + "loss": 1.8821, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 24.060317993164062, + "learning_rate": 3.928571428571429e-06, + "loss": 1.853, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 23.929243087768555, + "learning_rate": 3.877551020408164e-06, + "loss": 1.9249, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 37.94782257080078, + "learning_rate": 3.826530612244898e-06, + "loss": 1.9666, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 26.99836540222168, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.8464, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 26.87177085876465, + "learning_rate": 3.724489795918368e-06, + "loss": 1.9178, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 36.669212341308594, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.8469, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 26.681806564331055, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.9433, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 27.62560272216797, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.8022, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 31.047653198242188, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.9952, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 22.605276107788086, + "learning_rate": 3.469387755102041e-06, + "loss": 1.8992, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 27.210397720336914, + "learning_rate": 3.4183673469387756e-06, + "loss": 2.0905, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 19.93979835510254, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.8109, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 28.999895095825195, + "learning_rate": 3.316326530612245e-06, + "loss": 1.8978, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 28.620771408081055, + "learning_rate": 3.2653061224489794e-06, + "loss": 2.0414, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 28.657215118408203, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.8445, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 33.65087127685547, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.8559, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 28.926881790161133, + "learning_rate": 3.112244897959184e-06, + "loss": 1.9608, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 24.56757926940918, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.8948, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 26.79437828063965, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.7803, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 22.516748428344727, + "learning_rate": 2.959183673469388e-06, + "loss": 1.9039, + "step": 340 + }, + { + "epoch": 0.8600252206809584, + "grad_norm": 28.779983520507812, + "learning_rate": 2.908163265306123e-06, + "loss": 1.9083, + "step": 341 + }, + { + "epoch": 0.862547288776797, + "grad_norm": 23.90164566040039, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.7973, + "step": 342 + }, + { + "epoch": 0.8650693568726355, + "grad_norm": 25.098011016845703, + "learning_rate": 2.8061224489795917e-06, + "loss": 1.8596, + "step": 343 + }, + { + "epoch": 0.8675914249684742, + "grad_norm": 36.381954193115234, + "learning_rate": 2.7551020408163266e-06, + "loss": 1.8069, + "step": 344 + }, + { + "epoch": 0.8701134930643127, + "grad_norm": 33.54197311401367, + "learning_rate": 2.7040816326530615e-06, + "loss": 1.9138, + "step": 345 + }, + { + "epoch": 0.8726355611601513, + "grad_norm": 27.836380004882812, + "learning_rate": 2.6530612244897964e-06, + "loss": 1.8806, + "step": 346 + }, + { + "epoch": 0.8751576292559899, + "grad_norm": 29.386459350585938, + "learning_rate": 2.602040816326531e-06, + "loss": 1.9654, + "step": 347 + }, + { + "epoch": 0.8776796973518285, + "grad_norm": 25.446636199951172, + "learning_rate": 2.5510204081632657e-06, + "loss": 1.8723, + "step": 348 + }, + { + "epoch": 0.880201765447667, + "grad_norm": 23.149751663208008, + "learning_rate": 2.5e-06, + "loss": 1.7802, + "step": 349 + }, + { + "epoch": 0.8827238335435057, + "grad_norm": 24.944000244140625, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.9328, + "step": 350 + }, + { + "epoch": 0.8852459016393442, + "grad_norm": 24.133886337280273, + "learning_rate": 2.3979591836734696e-06, + "loss": 1.8824, + "step": 351 + }, + { + "epoch": 0.8877679697351829, + "grad_norm": 29.368667602539062, + "learning_rate": 2.3469387755102044e-06, + "loss": 1.8283, + "step": 352 + }, + { + "epoch": 0.8902900378310215, + "grad_norm": 27.814085006713867, + "learning_rate": 2.295918367346939e-06, + "loss": 1.8401, + "step": 353 + }, + { + "epoch": 0.89281210592686, + "grad_norm": 36.647239685058594, + "learning_rate": 2.244897959183674e-06, + "loss": 1.9979, + "step": 354 + }, + { + "epoch": 0.8953341740226987, + "grad_norm": 30.776464462280273, + "learning_rate": 2.1938775510204083e-06, + "loss": 1.971, + "step": 355 + }, + { + "epoch": 0.8978562421185372, + "grad_norm": 21.568681716918945, + "learning_rate": 2.1428571428571427e-06, + "loss": 2.0157, + "step": 356 + }, + { + "epoch": 0.9003783102143758, + "grad_norm": 30.973060607910156, + "learning_rate": 2.0918367346938776e-06, + "loss": 1.9058, + "step": 357 + }, + { + "epoch": 0.9029003783102144, + "grad_norm": 25.841272354125977, + "learning_rate": 2.0408163265306125e-06, + "loss": 1.8946, + "step": 358 + }, + { + "epoch": 0.905422446406053, + "grad_norm": 23.440290451049805, + "learning_rate": 1.989795918367347e-06, + "loss": 1.9487, + "step": 359 + }, + { + "epoch": 0.9079445145018915, + "grad_norm": 25.26444435119629, + "learning_rate": 1.938775510204082e-06, + "loss": 1.7457, + "step": 360 + }, + { + "epoch": 0.9104665825977302, + "grad_norm": 26.824586868286133, + "learning_rate": 1.8877551020408163e-06, + "loss": 1.9297, + "step": 361 + }, + { + "epoch": 0.9129886506935687, + "grad_norm": 30.850406646728516, + "learning_rate": 1.8367346938775512e-06, + "loss": 2.0683, + "step": 362 + }, + { + "epoch": 0.9155107187894073, + "grad_norm": 32.09762191772461, + "learning_rate": 1.7857142857142859e-06, + "loss": 1.7749, + "step": 363 + }, + { + "epoch": 0.9180327868852459, + "grad_norm": 27.876068115234375, + "learning_rate": 1.7346938775510206e-06, + "loss": 1.883, + "step": 364 + }, + { + "epoch": 0.9205548549810845, + "grad_norm": 30.165185928344727, + "learning_rate": 1.6836734693877552e-06, + "loss": 1.9148, + "step": 365 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 25.353132247924805, + "learning_rate": 1.6326530612244897e-06, + "loss": 1.8784, + "step": 366 + }, + { + "epoch": 0.9255989911727617, + "grad_norm": 38.13972854614258, + "learning_rate": 1.5816326530612248e-06, + "loss": 1.808, + "step": 367 + }, + { + "epoch": 0.9281210592686002, + "grad_norm": 29.678409576416016, + "learning_rate": 1.5306122448979593e-06, + "loss": 1.882, + "step": 368 + }, + { + "epoch": 0.9306431273644389, + "grad_norm": 19.059682846069336, + "learning_rate": 1.479591836734694e-06, + "loss": 1.8964, + "step": 369 + }, + { + "epoch": 0.9331651954602774, + "grad_norm": 26.248071670532227, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.8685, + "step": 370 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5406833335959552.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-370/training_args.bin b/checkpoints/checkpoint-370/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-370/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-380/README.md b/checkpoints/checkpoint-380/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-380/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-380/adapter_config.json b/checkpoints/checkpoint-380/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-380/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-380/adapter_model.safetensors b/checkpoints/checkpoint-380/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4c967dc2b96232196119eb36d851ba5498e364ae --- /dev/null +++ b/checkpoints/checkpoint-380/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd36998b9ae40fc41dc829113e17703561d2d2eb520bc9ec8be7ae87c3e28d05 +size 41248 diff --git a/checkpoints/checkpoint-380/chat_template.jinja b/checkpoints/checkpoint-380/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-380/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-380/optimizer.pt b/checkpoints/checkpoint-380/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..75c4ea07b541f158314f2f1fca38e4c3a3f50ad3 --- /dev/null +++ b/checkpoints/checkpoint-380/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d505fe80997c8341436ac2b513869ce24c6b040e7ef65d7c859c8a18979b6857 +size 38953 diff --git a/checkpoints/checkpoint-380/rng_state.pth b/checkpoints/checkpoint-380/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-380/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-380/scheduler.pt b/checkpoints/checkpoint-380/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..15dc3ebbc8ec02935ea2deee2594f454bb68b3d5 --- /dev/null +++ b/checkpoints/checkpoint-380/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:748551ef496b3d17bf00453efd63985252edf9aff7fd68bb47afcddf11dc4f53 +size 1465 diff --git a/checkpoints/checkpoint-380/special_tokens_map.json b/checkpoints/checkpoint-380/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-380/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-380/tokenizer.json b/checkpoints/checkpoint-380/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-380/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-380/tokenizer_config.json b/checkpoints/checkpoint-380/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-380/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-380/trainer_state.json b/checkpoints/checkpoint-380/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1f397e3192364229e5a63ef6e0722c7c1fc2ebda --- /dev/null +++ b/checkpoints/checkpoint-380/trainer_state.json @@ -0,0 +1,2718 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9583858764186634, + "eval_steps": 100, + "global_step": 380, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 25.63576316833496, + "learning_rate": 9.03061224489796e-06, + "loss": 1.9399, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 21.650251388549805, + "learning_rate": 8.979591836734695e-06, + "loss": 1.9565, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 29.605735778808594, + "learning_rate": 8.92857142857143e-06, + "loss": 1.729, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 23.98230743408203, + "learning_rate": 8.877551020408163e-06, + "loss": 1.9399, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 20.37510108947754, + "learning_rate": 8.826530612244899e-06, + "loss": 1.7322, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 25.876188278198242, + "learning_rate": 8.775510204081633e-06, + "loss": 1.9444, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 32.07249069213867, + "learning_rate": 8.724489795918369e-06, + "loss": 1.9364, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 28.014524459838867, + "learning_rate": 8.673469387755103e-06, + "loss": 1.757, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 30.82647132873535, + "learning_rate": 8.622448979591837e-06, + "loss": 1.9067, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 30.651660919189453, + "learning_rate": 8.571428571428571e-06, + "loss": 1.9906, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 25.239904403686523, + "learning_rate": 8.520408163265307e-06, + "loss": 1.8914, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 21.33747673034668, + "learning_rate": 8.469387755102042e-06, + "loss": 1.9999, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 25.255064010620117, + "learning_rate": 8.418367346938776e-06, + "loss": 1.8941, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 24.443973541259766, + "learning_rate": 8.36734693877551e-06, + "loss": 1.7679, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 25.473894119262695, + "learning_rate": 8.316326530612246e-06, + "loss": 1.7876, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 26.28467559814453, + "learning_rate": 8.26530612244898e-06, + "loss": 1.6761, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 24.488052368164062, + "learning_rate": 8.214285714285714e-06, + "loss": 2.0022, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 30.074064254760742, + "learning_rate": 8.16326530612245e-06, + "loss": 1.7747, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 23.73440170288086, + "learning_rate": 8.112244897959184e-06, + "loss": 1.8468, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 22.338869094848633, + "learning_rate": 8.06122448979592e-06, + "loss": 1.8611, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 24.844266891479492, + "learning_rate": 8.010204081632654e-06, + "loss": 1.9285, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 29.65668487548828, + "learning_rate": 7.959183673469388e-06, + "loss": 1.935, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 26.01723289489746, + "learning_rate": 7.908163265306124e-06, + "loss": 1.7587, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 27.04817771911621, + "learning_rate": 7.857142857142858e-06, + "loss": 1.8878, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 36.23786163330078, + "learning_rate": 7.806122448979593e-06, + "loss": 1.992, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 19.283294677734375, + "learning_rate": 7.755102040816327e-06, + "loss": 1.8066, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 24.24143409729004, + "learning_rate": 7.704081632653061e-06, + "loss": 1.8899, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 25.59832763671875, + "learning_rate": 7.653061224489796e-06, + "loss": 1.9601, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 27.195640563964844, + "learning_rate": 7.602040816326531e-06, + "loss": 1.9561, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 27.854570388793945, + "learning_rate": 7.551020408163265e-06, + "loss": 1.8781, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 25.715761184692383, + "learning_rate": 7.500000000000001e-06, + "loss": 1.8542, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 22.562984466552734, + "learning_rate": 7.448979591836736e-06, + "loss": 1.7681, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 20.540348052978516, + "learning_rate": 7.39795918367347e-06, + "loss": 1.9617, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 24.610937118530273, + "learning_rate": 7.346938775510205e-06, + "loss": 1.9694, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 27.93538475036621, + "learning_rate": 7.295918367346939e-06, + "loss": 1.8858, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 31.466445922851562, + "learning_rate": 7.244897959183675e-06, + "loss": 1.8252, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 26.276226043701172, + "learning_rate": 7.193877551020409e-06, + "loss": 1.8865, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 22.52095603942871, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.7649, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 20.15144157409668, + "learning_rate": 7.091836734693878e-06, + "loss": 2.0158, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 26.405349731445312, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.8932, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 32.94384765625, + "learning_rate": 6.989795918367348e-06, + "loss": 1.7795, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 23.109092712402344, + "learning_rate": 6.938775510204082e-06, + "loss": 1.8383, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 21.75737190246582, + "learning_rate": 6.887755102040817e-06, + "loss": 1.8727, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 22.96916389465332, + "learning_rate": 6.836734693877551e-06, + "loss": 1.8544, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 25.62445640563965, + "learning_rate": 6.785714285714287e-06, + "loss": 1.7503, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 25.430530548095703, + "learning_rate": 6.734693877551021e-06, + "loss": 1.7938, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 26.462881088256836, + "learning_rate": 6.683673469387756e-06, + "loss": 1.8284, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 31.45004653930664, + "learning_rate": 6.63265306122449e-06, + "loss": 2.0328, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 30.525737762451172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.8192, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 25.705707550048828, + "learning_rate": 6.530612244897959e-06, + "loss": 1.8533, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 39.90187454223633, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.9483, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 28.0180721282959, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.8132, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 34.821372985839844, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.9599, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 24.018394470214844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.9248, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 24.074344635009766, + "learning_rate": 6.275510204081633e-06, + "loss": 2.0148, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 31.1939754486084, + "learning_rate": 6.224489795918368e-06, + "loss": 1.8959, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 25.481502532958984, + "learning_rate": 6.173469387755102e-06, + "loss": 1.9832, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 29.6664981842041, + "learning_rate": 6.122448979591837e-06, + "loss": 1.9222, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 26.30698585510254, + "learning_rate": 6.071428571428571e-06, + "loss": 1.9897, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 31.827558517456055, + "learning_rate": 6.020408163265307e-06, + "loss": 1.8615, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 24.80223846435547, + "learning_rate": 5.969387755102042e-06, + "loss": 1.9579, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 36.134700775146484, + "learning_rate": 5.918367346938776e-06, + "loss": 1.723, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 30.388233184814453, + "learning_rate": 5.867346938775511e-06, + "loss": 1.9736, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 32.231563568115234, + "learning_rate": 5.816326530612246e-06, + "loss": 1.9228, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 38.05869674682617, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.9159, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 27.256147384643555, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.8072, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 25.67181396484375, + "learning_rate": 5.663265306122449e-06, + "loss": 1.8633, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 31.8681697845459, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.9822, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 32.85325241088867, + "learning_rate": 5.561224489795919e-06, + "loss": 1.8166, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 35.64312744140625, + "learning_rate": 5.510204081632653e-06, + "loss": 1.7166, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 24.276235580444336, + "learning_rate": 5.459183673469388e-06, + "loss": 1.7593, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 29.371950149536133, + "learning_rate": 5.408163265306123e-06, + "loss": 1.8124, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 23.76220703125, + "learning_rate": 5.357142857142857e-06, + "loss": 1.7775, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 37.103050231933594, + "learning_rate": 5.306122448979593e-06, + "loss": 1.9253, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 20.0811767578125, + "learning_rate": 5.255102040816327e-06, + "loss": 1.8711, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 35.33123016357422, + "learning_rate": 5.204081632653062e-06, + "loss": 1.8764, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 31.880672454833984, + "learning_rate": 5.153061224489796e-06, + "loss": 1.8929, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 21.682334899902344, + "learning_rate": 5.1020408163265315e-06, + "loss": 2.0377, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 34.68608474731445, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.9341, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 25.59632110595703, + "learning_rate": 5e-06, + "loss": 1.8264, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.8502724170684814, + "eval_runtime": 6.6208, + "eval_samples_per_second": 106.483, + "eval_steps_per_second": 53.317, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 33.780616760253906, + "learning_rate": 4.948979591836735e-06, + "loss": 1.8315, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 23.005069732666016, + "learning_rate": 4.897959183673469e-06, + "loss": 1.8434, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 27.338787078857422, + "learning_rate": 4.846938775510204e-06, + "loss": 1.9102, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 26.87493133544922, + "learning_rate": 4.795918367346939e-06, + "loss": 1.8408, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 33.59117126464844, + "learning_rate": 4.744897959183674e-06, + "loss": 1.8633, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 38.98092269897461, + "learning_rate": 4.693877551020409e-06, + "loss": 1.8187, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 28.7203369140625, + "learning_rate": 4.642857142857144e-06, + "loss": 1.8425, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 30.91414451599121, + "learning_rate": 4.591836734693878e-06, + "loss": 1.8526, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 29.04154396057129, + "learning_rate": 4.540816326530613e-06, + "loss": 1.8913, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 29.638099670410156, + "learning_rate": 4.489795918367348e-06, + "loss": 1.8736, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 21.630523681640625, + "learning_rate": 4.438775510204082e-06, + "loss": 1.7407, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 28.39365577697754, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.7503, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 30.86245346069336, + "learning_rate": 4.336734693877551e-06, + "loss": 1.885, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 20.29497718811035, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.8725, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 20.04092025756836, + "learning_rate": 4.234693877551021e-06, + "loss": 1.949, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 26.88593101501465, + "learning_rate": 4.183673469387755e-06, + "loss": 1.8807, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 28.06767463684082, + "learning_rate": 4.13265306122449e-06, + "loss": 1.9135, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 35.69569778442383, + "learning_rate": 4.081632653061225e-06, + "loss": 1.8257, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 29.278770446777344, + "learning_rate": 4.03061224489796e-06, + "loss": 1.8459, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 28.043975830078125, + "learning_rate": 3.979591836734694e-06, + "loss": 1.8821, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 24.060317993164062, + "learning_rate": 3.928571428571429e-06, + "loss": 1.853, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 23.929243087768555, + "learning_rate": 3.877551020408164e-06, + "loss": 1.9249, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 37.94782257080078, + "learning_rate": 3.826530612244898e-06, + "loss": 1.9666, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 26.99836540222168, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.8464, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 26.87177085876465, + "learning_rate": 3.724489795918368e-06, + "loss": 1.9178, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 36.669212341308594, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.8469, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 26.681806564331055, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.9433, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 27.62560272216797, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.8022, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 31.047653198242188, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.9952, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 22.605276107788086, + "learning_rate": 3.469387755102041e-06, + "loss": 1.8992, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 27.210397720336914, + "learning_rate": 3.4183673469387756e-06, + "loss": 2.0905, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 19.93979835510254, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.8109, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 28.999895095825195, + "learning_rate": 3.316326530612245e-06, + "loss": 1.8978, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 28.620771408081055, + "learning_rate": 3.2653061224489794e-06, + "loss": 2.0414, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 28.657215118408203, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.8445, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 33.65087127685547, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.8559, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 28.926881790161133, + "learning_rate": 3.112244897959184e-06, + "loss": 1.9608, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 24.56757926940918, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.8948, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 26.79437828063965, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.7803, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 22.516748428344727, + "learning_rate": 2.959183673469388e-06, + "loss": 1.9039, + "step": 340 + }, + { + "epoch": 0.8600252206809584, + "grad_norm": 28.779983520507812, + "learning_rate": 2.908163265306123e-06, + "loss": 1.9083, + "step": 341 + }, + { + "epoch": 0.862547288776797, + "grad_norm": 23.90164566040039, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.7973, + "step": 342 + }, + { + "epoch": 0.8650693568726355, + "grad_norm": 25.098011016845703, + "learning_rate": 2.8061224489795917e-06, + "loss": 1.8596, + "step": 343 + }, + { + "epoch": 0.8675914249684742, + "grad_norm": 36.381954193115234, + "learning_rate": 2.7551020408163266e-06, + "loss": 1.8069, + "step": 344 + }, + { + "epoch": 0.8701134930643127, + "grad_norm": 33.54197311401367, + "learning_rate": 2.7040816326530615e-06, + "loss": 1.9138, + "step": 345 + }, + { + "epoch": 0.8726355611601513, + "grad_norm": 27.836380004882812, + "learning_rate": 2.6530612244897964e-06, + "loss": 1.8806, + "step": 346 + }, + { + "epoch": 0.8751576292559899, + "grad_norm": 29.386459350585938, + "learning_rate": 2.602040816326531e-06, + "loss": 1.9654, + "step": 347 + }, + { + "epoch": 0.8776796973518285, + "grad_norm": 25.446636199951172, + "learning_rate": 2.5510204081632657e-06, + "loss": 1.8723, + "step": 348 + }, + { + "epoch": 0.880201765447667, + "grad_norm": 23.149751663208008, + "learning_rate": 2.5e-06, + "loss": 1.7802, + "step": 349 + }, + { + "epoch": 0.8827238335435057, + "grad_norm": 24.944000244140625, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.9328, + "step": 350 + }, + { + "epoch": 0.8852459016393442, + "grad_norm": 24.133886337280273, + "learning_rate": 2.3979591836734696e-06, + "loss": 1.8824, + "step": 351 + }, + { + "epoch": 0.8877679697351829, + "grad_norm": 29.368667602539062, + "learning_rate": 2.3469387755102044e-06, + "loss": 1.8283, + "step": 352 + }, + { + "epoch": 0.8902900378310215, + "grad_norm": 27.814085006713867, + "learning_rate": 2.295918367346939e-06, + "loss": 1.8401, + "step": 353 + }, + { + "epoch": 0.89281210592686, + "grad_norm": 36.647239685058594, + "learning_rate": 2.244897959183674e-06, + "loss": 1.9979, + "step": 354 + }, + { + "epoch": 0.8953341740226987, + "grad_norm": 30.776464462280273, + "learning_rate": 2.1938775510204083e-06, + "loss": 1.971, + "step": 355 + }, + { + "epoch": 0.8978562421185372, + "grad_norm": 21.568681716918945, + "learning_rate": 2.1428571428571427e-06, + "loss": 2.0157, + "step": 356 + }, + { + "epoch": 0.9003783102143758, + "grad_norm": 30.973060607910156, + "learning_rate": 2.0918367346938776e-06, + "loss": 1.9058, + "step": 357 + }, + { + "epoch": 0.9029003783102144, + "grad_norm": 25.841272354125977, + "learning_rate": 2.0408163265306125e-06, + "loss": 1.8946, + "step": 358 + }, + { + "epoch": 0.905422446406053, + "grad_norm": 23.440290451049805, + "learning_rate": 1.989795918367347e-06, + "loss": 1.9487, + "step": 359 + }, + { + "epoch": 0.9079445145018915, + "grad_norm": 25.26444435119629, + "learning_rate": 1.938775510204082e-06, + "loss": 1.7457, + "step": 360 + }, + { + "epoch": 0.9104665825977302, + "grad_norm": 26.824586868286133, + "learning_rate": 1.8877551020408163e-06, + "loss": 1.9297, + "step": 361 + }, + { + "epoch": 0.9129886506935687, + "grad_norm": 30.850406646728516, + "learning_rate": 1.8367346938775512e-06, + "loss": 2.0683, + "step": 362 + }, + { + "epoch": 0.9155107187894073, + "grad_norm": 32.09762191772461, + "learning_rate": 1.7857142857142859e-06, + "loss": 1.7749, + "step": 363 + }, + { + "epoch": 0.9180327868852459, + "grad_norm": 27.876068115234375, + "learning_rate": 1.7346938775510206e-06, + "loss": 1.883, + "step": 364 + }, + { + "epoch": 0.9205548549810845, + "grad_norm": 30.165185928344727, + "learning_rate": 1.6836734693877552e-06, + "loss": 1.9148, + "step": 365 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 25.353132247924805, + "learning_rate": 1.6326530612244897e-06, + "loss": 1.8784, + "step": 366 + }, + { + "epoch": 0.9255989911727617, + "grad_norm": 38.13972854614258, + "learning_rate": 1.5816326530612248e-06, + "loss": 1.808, + "step": 367 + }, + { + "epoch": 0.9281210592686002, + "grad_norm": 29.678409576416016, + "learning_rate": 1.5306122448979593e-06, + "loss": 1.882, + "step": 368 + }, + { + "epoch": 0.9306431273644389, + "grad_norm": 19.059682846069336, + "learning_rate": 1.479591836734694e-06, + "loss": 1.8964, + "step": 369 + }, + { + "epoch": 0.9331651954602774, + "grad_norm": 26.248071670532227, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.8685, + "step": 370 + }, + { + "epoch": 0.935687263556116, + "grad_norm": 28.667518615722656, + "learning_rate": 1.3775510204081633e-06, + "loss": 1.947, + "step": 371 + }, + { + "epoch": 0.9382093316519546, + "grad_norm": 27.218080520629883, + "learning_rate": 1.3265306122448982e-06, + "loss": 1.9719, + "step": 372 + }, + { + "epoch": 0.9407313997477932, + "grad_norm": 36.85118865966797, + "learning_rate": 1.2755102040816329e-06, + "loss": 1.8174, + "step": 373 + }, + { + "epoch": 0.9432534678436317, + "grad_norm": 24.54947280883789, + "learning_rate": 1.2244897959183673e-06, + "loss": 1.9564, + "step": 374 + }, + { + "epoch": 0.9457755359394704, + "grad_norm": 23.265106201171875, + "learning_rate": 1.1734693877551022e-06, + "loss": 1.9667, + "step": 375 + }, + { + "epoch": 0.9482976040353089, + "grad_norm": 25.103755950927734, + "learning_rate": 1.122448979591837e-06, + "loss": 1.8938, + "step": 376 + }, + { + "epoch": 0.9508196721311475, + "grad_norm": 28.593093872070312, + "learning_rate": 1.0714285714285714e-06, + "loss": 2.1103, + "step": 377 + }, + { + "epoch": 0.9533417402269861, + "grad_norm": 24.21011734008789, + "learning_rate": 1.0204081632653063e-06, + "loss": 1.695, + "step": 378 + }, + { + "epoch": 0.9558638083228247, + "grad_norm": 31.19601821899414, + "learning_rate": 9.69387755102041e-07, + "loss": 1.9795, + "step": 379 + }, + { + "epoch": 0.9583858764186634, + "grad_norm": 31.64060401916504, + "learning_rate": 9.183673469387756e-07, + "loss": 1.8726, + "step": 380 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5552725043625984.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-380/training_args.bin b/checkpoints/checkpoint-380/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-380/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-390/README.md b/checkpoints/checkpoint-390/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-390/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-390/adapter_config.json b/checkpoints/checkpoint-390/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-390/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-390/adapter_model.safetensors b/checkpoints/checkpoint-390/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad9031706d55e2c62e8310625486c6b8607b3041 --- /dev/null +++ b/checkpoints/checkpoint-390/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a99b25d35577376beeed8de054feb81d8fb794faadbe8548b82148b2c2eec6e +size 41248 diff --git a/checkpoints/checkpoint-390/chat_template.jinja b/checkpoints/checkpoint-390/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-390/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-390/optimizer.pt b/checkpoints/checkpoint-390/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa65b0a766356850b853b86326e4ecc721ce6329 --- /dev/null +++ b/checkpoints/checkpoint-390/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394505aa4d69f7d79f776e02b61edf6b8deee77b9546dbc24d4630fe359b7875 +size 38953 diff --git a/checkpoints/checkpoint-390/rng_state.pth b/checkpoints/checkpoint-390/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-390/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-390/scheduler.pt b/checkpoints/checkpoint-390/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..21fb5bcb5b7b8c618a92ce8cc57485217a07ee0b --- /dev/null +++ b/checkpoints/checkpoint-390/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4503455eed959bc7133b6bc9a764e0c234ec4b1f2b0938e0a9631235cd9dbd61 +size 1465 diff --git a/checkpoints/checkpoint-390/special_tokens_map.json b/checkpoints/checkpoint-390/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-390/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-390/tokenizer.json b/checkpoints/checkpoint-390/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-390/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-390/tokenizer_config.json b/checkpoints/checkpoint-390/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-390/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-390/trainer_state.json b/checkpoints/checkpoint-390/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..225720ae95996fb94d06cd285d9f6d7001dfb5d2 --- /dev/null +++ b/checkpoints/checkpoint-390/trainer_state.json @@ -0,0 +1,2788 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9836065573770492, + "eval_steps": 100, + "global_step": 390, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 25.63576316833496, + "learning_rate": 9.03061224489796e-06, + "loss": 1.9399, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 21.650251388549805, + "learning_rate": 8.979591836734695e-06, + "loss": 1.9565, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 29.605735778808594, + "learning_rate": 8.92857142857143e-06, + "loss": 1.729, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 23.98230743408203, + "learning_rate": 8.877551020408163e-06, + "loss": 1.9399, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 20.37510108947754, + "learning_rate": 8.826530612244899e-06, + "loss": 1.7322, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 25.876188278198242, + "learning_rate": 8.775510204081633e-06, + "loss": 1.9444, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 32.07249069213867, + "learning_rate": 8.724489795918369e-06, + "loss": 1.9364, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 28.014524459838867, + "learning_rate": 8.673469387755103e-06, + "loss": 1.757, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 30.82647132873535, + "learning_rate": 8.622448979591837e-06, + "loss": 1.9067, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 30.651660919189453, + "learning_rate": 8.571428571428571e-06, + "loss": 1.9906, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 25.239904403686523, + "learning_rate": 8.520408163265307e-06, + "loss": 1.8914, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 21.33747673034668, + "learning_rate": 8.469387755102042e-06, + "loss": 1.9999, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 25.255064010620117, + "learning_rate": 8.418367346938776e-06, + "loss": 1.8941, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 24.443973541259766, + "learning_rate": 8.36734693877551e-06, + "loss": 1.7679, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 25.473894119262695, + "learning_rate": 8.316326530612246e-06, + "loss": 1.7876, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 26.28467559814453, + "learning_rate": 8.26530612244898e-06, + "loss": 1.6761, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 24.488052368164062, + "learning_rate": 8.214285714285714e-06, + "loss": 2.0022, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 30.074064254760742, + "learning_rate": 8.16326530612245e-06, + "loss": 1.7747, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 23.73440170288086, + "learning_rate": 8.112244897959184e-06, + "loss": 1.8468, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 22.338869094848633, + "learning_rate": 8.06122448979592e-06, + "loss": 1.8611, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 24.844266891479492, + "learning_rate": 8.010204081632654e-06, + "loss": 1.9285, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 29.65668487548828, + "learning_rate": 7.959183673469388e-06, + "loss": 1.935, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 26.01723289489746, + "learning_rate": 7.908163265306124e-06, + "loss": 1.7587, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 27.04817771911621, + "learning_rate": 7.857142857142858e-06, + "loss": 1.8878, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 36.23786163330078, + "learning_rate": 7.806122448979593e-06, + "loss": 1.992, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 19.283294677734375, + "learning_rate": 7.755102040816327e-06, + "loss": 1.8066, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 24.24143409729004, + "learning_rate": 7.704081632653061e-06, + "loss": 1.8899, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 25.59832763671875, + "learning_rate": 7.653061224489796e-06, + "loss": 1.9601, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 27.195640563964844, + "learning_rate": 7.602040816326531e-06, + "loss": 1.9561, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 27.854570388793945, + "learning_rate": 7.551020408163265e-06, + "loss": 1.8781, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 25.715761184692383, + "learning_rate": 7.500000000000001e-06, + "loss": 1.8542, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 22.562984466552734, + "learning_rate": 7.448979591836736e-06, + "loss": 1.7681, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 20.540348052978516, + "learning_rate": 7.39795918367347e-06, + "loss": 1.9617, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 24.610937118530273, + "learning_rate": 7.346938775510205e-06, + "loss": 1.9694, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 27.93538475036621, + "learning_rate": 7.295918367346939e-06, + "loss": 1.8858, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 31.466445922851562, + "learning_rate": 7.244897959183675e-06, + "loss": 1.8252, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 26.276226043701172, + "learning_rate": 7.193877551020409e-06, + "loss": 1.8865, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 22.52095603942871, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.7649, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 20.15144157409668, + "learning_rate": 7.091836734693878e-06, + "loss": 2.0158, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 26.405349731445312, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.8932, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 32.94384765625, + "learning_rate": 6.989795918367348e-06, + "loss": 1.7795, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 23.109092712402344, + "learning_rate": 6.938775510204082e-06, + "loss": 1.8383, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 21.75737190246582, + "learning_rate": 6.887755102040817e-06, + "loss": 1.8727, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 22.96916389465332, + "learning_rate": 6.836734693877551e-06, + "loss": 1.8544, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 25.62445640563965, + "learning_rate": 6.785714285714287e-06, + "loss": 1.7503, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 25.430530548095703, + "learning_rate": 6.734693877551021e-06, + "loss": 1.7938, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 26.462881088256836, + "learning_rate": 6.683673469387756e-06, + "loss": 1.8284, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 31.45004653930664, + "learning_rate": 6.63265306122449e-06, + "loss": 2.0328, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 30.525737762451172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.8192, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 25.705707550048828, + "learning_rate": 6.530612244897959e-06, + "loss": 1.8533, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 39.90187454223633, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.9483, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 28.0180721282959, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.8132, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 34.821372985839844, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.9599, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 24.018394470214844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.9248, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 24.074344635009766, + "learning_rate": 6.275510204081633e-06, + "loss": 2.0148, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 31.1939754486084, + "learning_rate": 6.224489795918368e-06, + "loss": 1.8959, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 25.481502532958984, + "learning_rate": 6.173469387755102e-06, + "loss": 1.9832, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 29.6664981842041, + "learning_rate": 6.122448979591837e-06, + "loss": 1.9222, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 26.30698585510254, + "learning_rate": 6.071428571428571e-06, + "loss": 1.9897, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 31.827558517456055, + "learning_rate": 6.020408163265307e-06, + "loss": 1.8615, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 24.80223846435547, + "learning_rate": 5.969387755102042e-06, + "loss": 1.9579, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 36.134700775146484, + "learning_rate": 5.918367346938776e-06, + "loss": 1.723, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 30.388233184814453, + "learning_rate": 5.867346938775511e-06, + "loss": 1.9736, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 32.231563568115234, + "learning_rate": 5.816326530612246e-06, + "loss": 1.9228, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 38.05869674682617, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.9159, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 27.256147384643555, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.8072, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 25.67181396484375, + "learning_rate": 5.663265306122449e-06, + "loss": 1.8633, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 31.8681697845459, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.9822, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 32.85325241088867, + "learning_rate": 5.561224489795919e-06, + "loss": 1.8166, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 35.64312744140625, + "learning_rate": 5.510204081632653e-06, + "loss": 1.7166, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 24.276235580444336, + "learning_rate": 5.459183673469388e-06, + "loss": 1.7593, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 29.371950149536133, + "learning_rate": 5.408163265306123e-06, + "loss": 1.8124, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 23.76220703125, + "learning_rate": 5.357142857142857e-06, + "loss": 1.7775, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 37.103050231933594, + "learning_rate": 5.306122448979593e-06, + "loss": 1.9253, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 20.0811767578125, + "learning_rate": 5.255102040816327e-06, + "loss": 1.8711, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 35.33123016357422, + "learning_rate": 5.204081632653062e-06, + "loss": 1.8764, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 31.880672454833984, + "learning_rate": 5.153061224489796e-06, + "loss": 1.8929, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 21.682334899902344, + "learning_rate": 5.1020408163265315e-06, + "loss": 2.0377, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 34.68608474731445, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.9341, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 25.59632110595703, + "learning_rate": 5e-06, + "loss": 1.8264, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.8502724170684814, + "eval_runtime": 6.6208, + "eval_samples_per_second": 106.483, + "eval_steps_per_second": 53.317, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 33.780616760253906, + "learning_rate": 4.948979591836735e-06, + "loss": 1.8315, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 23.005069732666016, + "learning_rate": 4.897959183673469e-06, + "loss": 1.8434, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 27.338787078857422, + "learning_rate": 4.846938775510204e-06, + "loss": 1.9102, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 26.87493133544922, + "learning_rate": 4.795918367346939e-06, + "loss": 1.8408, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 33.59117126464844, + "learning_rate": 4.744897959183674e-06, + "loss": 1.8633, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 38.98092269897461, + "learning_rate": 4.693877551020409e-06, + "loss": 1.8187, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 28.7203369140625, + "learning_rate": 4.642857142857144e-06, + "loss": 1.8425, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 30.91414451599121, + "learning_rate": 4.591836734693878e-06, + "loss": 1.8526, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 29.04154396057129, + "learning_rate": 4.540816326530613e-06, + "loss": 1.8913, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 29.638099670410156, + "learning_rate": 4.489795918367348e-06, + "loss": 1.8736, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 21.630523681640625, + "learning_rate": 4.438775510204082e-06, + "loss": 1.7407, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 28.39365577697754, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.7503, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 30.86245346069336, + "learning_rate": 4.336734693877551e-06, + "loss": 1.885, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 20.29497718811035, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.8725, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 20.04092025756836, + "learning_rate": 4.234693877551021e-06, + "loss": 1.949, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 26.88593101501465, + "learning_rate": 4.183673469387755e-06, + "loss": 1.8807, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 28.06767463684082, + "learning_rate": 4.13265306122449e-06, + "loss": 1.9135, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 35.69569778442383, + "learning_rate": 4.081632653061225e-06, + "loss": 1.8257, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 29.278770446777344, + "learning_rate": 4.03061224489796e-06, + "loss": 1.8459, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 28.043975830078125, + "learning_rate": 3.979591836734694e-06, + "loss": 1.8821, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 24.060317993164062, + "learning_rate": 3.928571428571429e-06, + "loss": 1.853, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 23.929243087768555, + "learning_rate": 3.877551020408164e-06, + "loss": 1.9249, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 37.94782257080078, + "learning_rate": 3.826530612244898e-06, + "loss": 1.9666, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 26.99836540222168, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.8464, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 26.87177085876465, + "learning_rate": 3.724489795918368e-06, + "loss": 1.9178, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 36.669212341308594, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.8469, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 26.681806564331055, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.9433, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 27.62560272216797, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.8022, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 31.047653198242188, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.9952, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 22.605276107788086, + "learning_rate": 3.469387755102041e-06, + "loss": 1.8992, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 27.210397720336914, + "learning_rate": 3.4183673469387756e-06, + "loss": 2.0905, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 19.93979835510254, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.8109, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 28.999895095825195, + "learning_rate": 3.316326530612245e-06, + "loss": 1.8978, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 28.620771408081055, + "learning_rate": 3.2653061224489794e-06, + "loss": 2.0414, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 28.657215118408203, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.8445, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 33.65087127685547, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.8559, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 28.926881790161133, + "learning_rate": 3.112244897959184e-06, + "loss": 1.9608, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 24.56757926940918, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.8948, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 26.79437828063965, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.7803, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 22.516748428344727, + "learning_rate": 2.959183673469388e-06, + "loss": 1.9039, + "step": 340 + }, + { + "epoch": 0.8600252206809584, + "grad_norm": 28.779983520507812, + "learning_rate": 2.908163265306123e-06, + "loss": 1.9083, + "step": 341 + }, + { + "epoch": 0.862547288776797, + "grad_norm": 23.90164566040039, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.7973, + "step": 342 + }, + { + "epoch": 0.8650693568726355, + "grad_norm": 25.098011016845703, + "learning_rate": 2.8061224489795917e-06, + "loss": 1.8596, + "step": 343 + }, + { + "epoch": 0.8675914249684742, + "grad_norm": 36.381954193115234, + "learning_rate": 2.7551020408163266e-06, + "loss": 1.8069, + "step": 344 + }, + { + "epoch": 0.8701134930643127, + "grad_norm": 33.54197311401367, + "learning_rate": 2.7040816326530615e-06, + "loss": 1.9138, + "step": 345 + }, + { + "epoch": 0.8726355611601513, + "grad_norm": 27.836380004882812, + "learning_rate": 2.6530612244897964e-06, + "loss": 1.8806, + "step": 346 + }, + { + "epoch": 0.8751576292559899, + "grad_norm": 29.386459350585938, + "learning_rate": 2.602040816326531e-06, + "loss": 1.9654, + "step": 347 + }, + { + "epoch": 0.8776796973518285, + "grad_norm": 25.446636199951172, + "learning_rate": 2.5510204081632657e-06, + "loss": 1.8723, + "step": 348 + }, + { + "epoch": 0.880201765447667, + "grad_norm": 23.149751663208008, + "learning_rate": 2.5e-06, + "loss": 1.7802, + "step": 349 + }, + { + "epoch": 0.8827238335435057, + "grad_norm": 24.944000244140625, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.9328, + "step": 350 + }, + { + "epoch": 0.8852459016393442, + "grad_norm": 24.133886337280273, + "learning_rate": 2.3979591836734696e-06, + "loss": 1.8824, + "step": 351 + }, + { + "epoch": 0.8877679697351829, + "grad_norm": 29.368667602539062, + "learning_rate": 2.3469387755102044e-06, + "loss": 1.8283, + "step": 352 + }, + { + "epoch": 0.8902900378310215, + "grad_norm": 27.814085006713867, + "learning_rate": 2.295918367346939e-06, + "loss": 1.8401, + "step": 353 + }, + { + "epoch": 0.89281210592686, + "grad_norm": 36.647239685058594, + "learning_rate": 2.244897959183674e-06, + "loss": 1.9979, + "step": 354 + }, + { + "epoch": 0.8953341740226987, + "grad_norm": 30.776464462280273, + "learning_rate": 2.1938775510204083e-06, + "loss": 1.971, + "step": 355 + }, + { + "epoch": 0.8978562421185372, + "grad_norm": 21.568681716918945, + "learning_rate": 2.1428571428571427e-06, + "loss": 2.0157, + "step": 356 + }, + { + "epoch": 0.9003783102143758, + "grad_norm": 30.973060607910156, + "learning_rate": 2.0918367346938776e-06, + "loss": 1.9058, + "step": 357 + }, + { + "epoch": 0.9029003783102144, + "grad_norm": 25.841272354125977, + "learning_rate": 2.0408163265306125e-06, + "loss": 1.8946, + "step": 358 + }, + { + "epoch": 0.905422446406053, + "grad_norm": 23.440290451049805, + "learning_rate": 1.989795918367347e-06, + "loss": 1.9487, + "step": 359 + }, + { + "epoch": 0.9079445145018915, + "grad_norm": 25.26444435119629, + "learning_rate": 1.938775510204082e-06, + "loss": 1.7457, + "step": 360 + }, + { + "epoch": 0.9104665825977302, + "grad_norm": 26.824586868286133, + "learning_rate": 1.8877551020408163e-06, + "loss": 1.9297, + "step": 361 + }, + { + "epoch": 0.9129886506935687, + "grad_norm": 30.850406646728516, + "learning_rate": 1.8367346938775512e-06, + "loss": 2.0683, + "step": 362 + }, + { + "epoch": 0.9155107187894073, + "grad_norm": 32.09762191772461, + "learning_rate": 1.7857142857142859e-06, + "loss": 1.7749, + "step": 363 + }, + { + "epoch": 0.9180327868852459, + "grad_norm": 27.876068115234375, + "learning_rate": 1.7346938775510206e-06, + "loss": 1.883, + "step": 364 + }, + { + "epoch": 0.9205548549810845, + "grad_norm": 30.165185928344727, + "learning_rate": 1.6836734693877552e-06, + "loss": 1.9148, + "step": 365 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 25.353132247924805, + "learning_rate": 1.6326530612244897e-06, + "loss": 1.8784, + "step": 366 + }, + { + "epoch": 0.9255989911727617, + "grad_norm": 38.13972854614258, + "learning_rate": 1.5816326530612248e-06, + "loss": 1.808, + "step": 367 + }, + { + "epoch": 0.9281210592686002, + "grad_norm": 29.678409576416016, + "learning_rate": 1.5306122448979593e-06, + "loss": 1.882, + "step": 368 + }, + { + "epoch": 0.9306431273644389, + "grad_norm": 19.059682846069336, + "learning_rate": 1.479591836734694e-06, + "loss": 1.8964, + "step": 369 + }, + { + "epoch": 0.9331651954602774, + "grad_norm": 26.248071670532227, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.8685, + "step": 370 + }, + { + "epoch": 0.935687263556116, + "grad_norm": 28.667518615722656, + "learning_rate": 1.3775510204081633e-06, + "loss": 1.947, + "step": 371 + }, + { + "epoch": 0.9382093316519546, + "grad_norm": 27.218080520629883, + "learning_rate": 1.3265306122448982e-06, + "loss": 1.9719, + "step": 372 + }, + { + "epoch": 0.9407313997477932, + "grad_norm": 36.85118865966797, + "learning_rate": 1.2755102040816329e-06, + "loss": 1.8174, + "step": 373 + }, + { + "epoch": 0.9432534678436317, + "grad_norm": 24.54947280883789, + "learning_rate": 1.2244897959183673e-06, + "loss": 1.9564, + "step": 374 + }, + { + "epoch": 0.9457755359394704, + "grad_norm": 23.265106201171875, + "learning_rate": 1.1734693877551022e-06, + "loss": 1.9667, + "step": 375 + }, + { + "epoch": 0.9482976040353089, + "grad_norm": 25.103755950927734, + "learning_rate": 1.122448979591837e-06, + "loss": 1.8938, + "step": 376 + }, + { + "epoch": 0.9508196721311475, + "grad_norm": 28.593093872070312, + "learning_rate": 1.0714285714285714e-06, + "loss": 2.1103, + "step": 377 + }, + { + "epoch": 0.9533417402269861, + "grad_norm": 24.21011734008789, + "learning_rate": 1.0204081632653063e-06, + "loss": 1.695, + "step": 378 + }, + { + "epoch": 0.9558638083228247, + "grad_norm": 31.19601821899414, + "learning_rate": 9.69387755102041e-07, + "loss": 1.9795, + "step": 379 + }, + { + "epoch": 0.9583858764186634, + "grad_norm": 31.64060401916504, + "learning_rate": 9.183673469387756e-07, + "loss": 1.8726, + "step": 380 + }, + { + "epoch": 0.9609079445145019, + "grad_norm": 26.76803207397461, + "learning_rate": 8.673469387755103e-07, + "loss": 1.9005, + "step": 381 + }, + { + "epoch": 0.9634300126103404, + "grad_norm": 29.592342376708984, + "learning_rate": 8.163265306122449e-07, + "loss": 1.8033, + "step": 382 + }, + { + "epoch": 0.9659520807061791, + "grad_norm": 29.41642951965332, + "learning_rate": 7.653061224489796e-07, + "loss": 2.0594, + "step": 383 + }, + { + "epoch": 0.9684741488020177, + "grad_norm": 17.477853775024414, + "learning_rate": 7.142857142857143e-07, + "loss": 1.8088, + "step": 384 + }, + { + "epoch": 0.9709962168978562, + "grad_norm": 31.39056396484375, + "learning_rate": 6.632653061224491e-07, + "loss": 1.8812, + "step": 385 + }, + { + "epoch": 0.9735182849936949, + "grad_norm": 34.46897888183594, + "learning_rate": 6.122448979591837e-07, + "loss": 1.9868, + "step": 386 + }, + { + "epoch": 0.9760403530895334, + "grad_norm": 29.819665908813477, + "learning_rate": 5.612244897959184e-07, + "loss": 1.8238, + "step": 387 + }, + { + "epoch": 0.978562421185372, + "grad_norm": 29.0477237701416, + "learning_rate": 5.102040816326531e-07, + "loss": 1.8195, + "step": 388 + }, + { + "epoch": 0.9810844892812106, + "grad_norm": 26.244121551513672, + "learning_rate": 4.591836734693878e-07, + "loss": 1.9176, + "step": 389 + }, + { + "epoch": 0.9836065573770492, + "grad_norm": 29.151588439941406, + "learning_rate": 4.0816326530612243e-07, + "loss": 1.9222, + "step": 390 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5703089377886208.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-390/training_args.bin b/checkpoints/checkpoint-390/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-390/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-397/README.md b/checkpoints/checkpoint-397/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-397/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-397/adapter_config.json b/checkpoints/checkpoint-397/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-397/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-397/adapter_model.safetensors b/checkpoints/checkpoint-397/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2cb5e63be461f902888c6376e683ec07e8b60ccd --- /dev/null +++ b/checkpoints/checkpoint-397/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:775110dea61026c476be64640336bc82938ab26937f74e0e54a10b81be094570 +size 41248 diff --git a/checkpoints/checkpoint-397/chat_template.jinja b/checkpoints/checkpoint-397/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-397/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-397/optimizer.pt b/checkpoints/checkpoint-397/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c00d1eb9857faf7638f884cf8c1878fc004027a7 --- /dev/null +++ b/checkpoints/checkpoint-397/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f44d53884abc5fc3590057b38720985da91e5437580c404c5593fbf28c539228 +size 38953 diff --git a/checkpoints/checkpoint-397/rng_state.pth b/checkpoints/checkpoint-397/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-397/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-397/scheduler.pt b/checkpoints/checkpoint-397/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..102e657d31650051a83d1d7ea3b668972446c16c --- /dev/null +++ b/checkpoints/checkpoint-397/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4500448392b99ca07eb31aa81080ff74976927f1ea697ed84792e9303ad6078 +size 1465 diff --git a/checkpoints/checkpoint-397/special_tokens_map.json b/checkpoints/checkpoint-397/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-397/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-397/tokenizer.json b/checkpoints/checkpoint-397/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-397/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-397/tokenizer_config.json b/checkpoints/checkpoint-397/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-397/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-397/trainer_state.json b/checkpoints/checkpoint-397/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5bf148f4bc28cec501277590977859f1dfe91248 --- /dev/null +++ b/checkpoints/checkpoint-397/trainer_state.json @@ -0,0 +1,2837 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 100, + "global_step": 397, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 25.125213623046875, + "learning_rate": 1.566326530612245e-05, + "loss": 2.0087, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 20.038599014282227, + "learning_rate": 1.5612244897959187e-05, + "loss": 2.0939, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 19.016841888427734, + "learning_rate": 1.556122448979592e-05, + "loss": 2.0183, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 21.97820472717285, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.8239, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 25.578901290893555, + "learning_rate": 1.545918367346939e-05, + "loss": 1.9388, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 23.74614143371582, + "learning_rate": 1.5408163265306123e-05, + "loss": 2.0492, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 22.203304290771484, + "learning_rate": 1.535714285714286e-05, + "loss": 1.941, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 21.39324188232422, + "learning_rate": 1.530612244897959e-05, + "loss": 1.9042, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 18.99315643310547, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.88, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 24.22341537475586, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.8147, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.8966256380081177, + "eval_runtime": 6.9787, + "eval_samples_per_second": 101.022, + "eval_steps_per_second": 50.583, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 18.296152114868164, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.8605, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 26.404766082763672, + "learning_rate": 1.510204081632653e-05, + "loss": 2.1195, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 19.187122344970703, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.9284, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 20.79934310913086, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.725, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 23.833288192749023, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.9215, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 22.301727294921875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.8728, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 23.685596466064453, + "learning_rate": 1.4846938775510204e-05, + "loss": 2.0482, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 18.969186782836914, + "learning_rate": 1.479591836734694e-05, + "loss": 1.8724, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 23.994483947753906, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.9542, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 16.84621238708496, + "learning_rate": 1.469387755102041e-05, + "loss": 1.9703, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 23.411087036132812, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.9836, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 29.55487632751465, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.9124, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 32.28921127319336, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.8566, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 24.007558822631836, + "learning_rate": 1.448979591836735e-05, + "loss": 1.8296, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 26.753524780273438, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.7181, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 22.49270248413086, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.8741, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 28.006656646728516, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.9151, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 17.606775283813477, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.9654, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 29.94802474975586, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.8849, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 28.27743148803711, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.8006, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 19.11652183532715, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.8539, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 24.255807876586914, + "learning_rate": 1.4081632653061225e-05, + "loss": 2.0162, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 22.508352279663086, + "learning_rate": 1.403061224489796e-05, + "loss": 1.858, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 27.028772354125977, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.8175, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 22.697704315185547, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.9789, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 31.604068756103516, + "learning_rate": 1.3877551020408165e-05, + "loss": 2.0039, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 27.71053695678711, + "learning_rate": 1.38265306122449e-05, + "loss": 1.9867, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 17.37586784362793, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.6931, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 20.28536605834961, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.9199, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 29.4377384185791, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.9322, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 17.703046798706055, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.8842, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 30.14008140563965, + "learning_rate": 1.3571428571428574e-05, + "loss": 2.1228, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 29.657262802124023, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.8929, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 18.243854522705078, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.8811, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 33.22247314453125, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.9814, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 26.413856506347656, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.9329, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 20.56089210510254, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.8944, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 19.480737686157227, + "learning_rate": 1.326530612244898e-05, + "loss": 1.9221, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 22.788074493408203, + "learning_rate": 1.3214285714285716e-05, + "loss": 2.0445, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 20.722291946411133, + "learning_rate": 1.316326530612245e-05, + "loss": 1.9998, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 25.190189361572266, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.8076, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 23.203886032104492, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.7821, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 25.32374382019043, + "learning_rate": 1.3010204081632653e-05, + "loss": 2.0356, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 28.798864364624023, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.8202, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 24.93810272216797, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.9237, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 36.78353500366211, + "learning_rate": 1.2857142857142859e-05, + "loss": 2.0019, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 28.510663986206055, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.9268, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 38.19087219238281, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.9366, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 20.796728134155273, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.8731, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 23.036758422851562, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.8835, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 27.058195114135742, + "learning_rate": 1.260204081632653e-05, + "loss": 1.8013, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 25.390460968017578, + "learning_rate": 1.2551020408163267e-05, + "loss": 2.0623, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 27.993654251098633, + "learning_rate": 1.25e-05, + "loss": 1.6895, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 24.15807342529297, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.9799, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 24.369815826416016, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.9687, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 24.572988510131836, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.8607, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 20.491390228271484, + "learning_rate": 1.229591836734694e-05, + "loss": 2.0677, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 25.128101348876953, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.9233, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 18.843276977539062, + "learning_rate": 1.219387755102041e-05, + "loss": 1.781, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 24.99994659423828, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.962, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 20.679218292236328, + "learning_rate": 1.2091836734693878e-05, + "loss": 2.0055, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 26.00550651550293, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.9761, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 33.80900192260742, + "learning_rate": 1.1989795918367348e-05, + "loss": 2.0502, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 25.639009475708008, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.9088, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 17.48627471923828, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.9359, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 23.16074562072754, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.8647, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 25.39946174621582, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.8523, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 25.8050537109375, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.8403, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 20.019033432006836, + "learning_rate": 1.1683673469387755e-05, + "loss": 2.0023, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 26.194847106933594, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.9429, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 21.064212799072266, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.8302, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 21.876129150390625, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.8881, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 33.61103439331055, + "learning_rate": 1.1479591836734697e-05, + "loss": 2.0497, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 27.204744338989258, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.8431, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 21.605751037597656, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.9149, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 30.307472229003906, + "learning_rate": 1.1326530612244899e-05, + "loss": 2.0313, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 23.69244384765625, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.8676, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 25.619901657104492, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.7905, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 28.0296573638916, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.8567, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 36.4359130859375, + "learning_rate": 1.1122448979591838e-05, + "loss": 2.115, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 26.91726303100586, + "learning_rate": 1.1071428571428572e-05, + "loss": 2.0642, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 23.085880279541016, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.9109, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 27.870641708374023, + "learning_rate": 1.096938775510204e-05, + "loss": 1.8999, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 32.0672607421875, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.904, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 28.879365921020508, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.7159, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 27.592771530151367, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.8561, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 27.412763595581055, + "learning_rate": 1.076530612244898e-05, + "loss": 1.9282, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 30.12356185913086, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.8726, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 39.9027214050293, + "learning_rate": 1.066326530612245e-05, + "loss": 1.7551, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 30.483945846557617, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.7643, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 26.00415802001953, + "learning_rate": 1.0561224489795918e-05, + "loss": 2.0552, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 23.03282356262207, + "learning_rate": 1.0510204081632654e-05, + "loss": 2.0052, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 33.653221130371094, + "learning_rate": 1.045918367346939e-05, + "loss": 1.7367, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 39.59351348876953, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.9726, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.77714920043945, + "learning_rate": 1.0357142857142859e-05, + "loss": 2.0906, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 33.194549560546875, + "learning_rate": 1.0306122448979591e-05, + "loss": 2.0742, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 25.10793685913086, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.9204, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 40.048404693603516, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.7775, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 26.085933685302734, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.9459, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 18.375, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.9536, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.8626214265823364, + "eval_runtime": 6.6508, + "eval_samples_per_second": 106.002, + "eval_steps_per_second": 53.076, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 33.858341217041016, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.8191, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 22.895992279052734, + "learning_rate": 1e-05, + "loss": 2.0596, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 30.55072593688965, + "learning_rate": 9.948979591836737e-06, + "loss": 1.8904, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 26.542705535888672, + "learning_rate": 9.89795918367347e-06, + "loss": 1.9683, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 39.81034851074219, + "learning_rate": 9.846938775510205e-06, + "loss": 1.9726, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 22.0065860748291, + "learning_rate": 9.795918367346939e-06, + "loss": 2.0575, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 19.012041091918945, + "learning_rate": 9.744897959183674e-06, + "loss": 1.8431, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 39.699974060058594, + "learning_rate": 9.693877551020408e-06, + "loss": 1.8231, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 21.391319274902344, + "learning_rate": 9.642857142857144e-06, + "loss": 1.7939, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 25.8063907623291, + "learning_rate": 9.591836734693878e-06, + "loss": 2.0366, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 20.598569869995117, + "learning_rate": 9.540816326530612e-06, + "loss": 1.8323, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 29.391401290893555, + "learning_rate": 9.489795918367348e-06, + "loss": 2.0052, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 24.39499855041504, + "learning_rate": 9.438775510204082e-06, + "loss": 1.8461, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 24.16887092590332, + "learning_rate": 9.387755102040818e-06, + "loss": 1.9404, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 24.577871322631836, + "learning_rate": 9.336734693877552e-06, + "loss": 1.9202, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 26.117361068725586, + "learning_rate": 9.285714285714288e-06, + "loss": 1.921, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 22.586837768554688, + "learning_rate": 9.234693877551022e-06, + "loss": 1.9692, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 18.438722610473633, + "learning_rate": 9.183673469387756e-06, + "loss": 1.9496, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 22.94545555114746, + "learning_rate": 9.13265306122449e-06, + "loss": 1.991, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 28.664562225341797, + "learning_rate": 9.081632653061225e-06, + "loss": 1.8352, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 25.63576316833496, + "learning_rate": 9.03061224489796e-06, + "loss": 1.9399, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 21.650251388549805, + "learning_rate": 8.979591836734695e-06, + "loss": 1.9565, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 29.605735778808594, + "learning_rate": 8.92857142857143e-06, + "loss": 1.729, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 23.98230743408203, + "learning_rate": 8.877551020408163e-06, + "loss": 1.9399, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 20.37510108947754, + "learning_rate": 8.826530612244899e-06, + "loss": 1.7322, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 25.876188278198242, + "learning_rate": 8.775510204081633e-06, + "loss": 1.9444, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 32.07249069213867, + "learning_rate": 8.724489795918369e-06, + "loss": 1.9364, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 28.014524459838867, + "learning_rate": 8.673469387755103e-06, + "loss": 1.757, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 30.82647132873535, + "learning_rate": 8.622448979591837e-06, + "loss": 1.9067, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 30.651660919189453, + "learning_rate": 8.571428571428571e-06, + "loss": 1.9906, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 25.239904403686523, + "learning_rate": 8.520408163265307e-06, + "loss": 1.8914, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 21.33747673034668, + "learning_rate": 8.469387755102042e-06, + "loss": 1.9999, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 25.255064010620117, + "learning_rate": 8.418367346938776e-06, + "loss": 1.8941, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 24.443973541259766, + "learning_rate": 8.36734693877551e-06, + "loss": 1.7679, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 25.473894119262695, + "learning_rate": 8.316326530612246e-06, + "loss": 1.7876, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 26.28467559814453, + "learning_rate": 8.26530612244898e-06, + "loss": 1.6761, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 24.488052368164062, + "learning_rate": 8.214285714285714e-06, + "loss": 2.0022, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 30.074064254760742, + "learning_rate": 8.16326530612245e-06, + "loss": 1.7747, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 23.73440170288086, + "learning_rate": 8.112244897959184e-06, + "loss": 1.8468, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 22.338869094848633, + "learning_rate": 8.06122448979592e-06, + "loss": 1.8611, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 24.844266891479492, + "learning_rate": 8.010204081632654e-06, + "loss": 1.9285, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 29.65668487548828, + "learning_rate": 7.959183673469388e-06, + "loss": 1.935, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 26.01723289489746, + "learning_rate": 7.908163265306124e-06, + "loss": 1.7587, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 27.04817771911621, + "learning_rate": 7.857142857142858e-06, + "loss": 1.8878, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 36.23786163330078, + "learning_rate": 7.806122448979593e-06, + "loss": 1.992, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 19.283294677734375, + "learning_rate": 7.755102040816327e-06, + "loss": 1.8066, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 24.24143409729004, + "learning_rate": 7.704081632653061e-06, + "loss": 1.8899, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 25.59832763671875, + "learning_rate": 7.653061224489796e-06, + "loss": 1.9601, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 27.195640563964844, + "learning_rate": 7.602040816326531e-06, + "loss": 1.9561, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 27.854570388793945, + "learning_rate": 7.551020408163265e-06, + "loss": 1.8781, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 25.715761184692383, + "learning_rate": 7.500000000000001e-06, + "loss": 1.8542, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 22.562984466552734, + "learning_rate": 7.448979591836736e-06, + "loss": 1.7681, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 20.540348052978516, + "learning_rate": 7.39795918367347e-06, + "loss": 1.9617, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 24.610937118530273, + "learning_rate": 7.346938775510205e-06, + "loss": 1.9694, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 27.93538475036621, + "learning_rate": 7.295918367346939e-06, + "loss": 1.8858, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 31.466445922851562, + "learning_rate": 7.244897959183675e-06, + "loss": 1.8252, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 26.276226043701172, + "learning_rate": 7.193877551020409e-06, + "loss": 1.8865, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 22.52095603942871, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.7649, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 20.15144157409668, + "learning_rate": 7.091836734693878e-06, + "loss": 2.0158, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 26.405349731445312, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.8932, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 32.94384765625, + "learning_rate": 6.989795918367348e-06, + "loss": 1.7795, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 23.109092712402344, + "learning_rate": 6.938775510204082e-06, + "loss": 1.8383, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 21.75737190246582, + "learning_rate": 6.887755102040817e-06, + "loss": 1.8727, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 22.96916389465332, + "learning_rate": 6.836734693877551e-06, + "loss": 1.8544, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 25.62445640563965, + "learning_rate": 6.785714285714287e-06, + "loss": 1.7503, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 25.430530548095703, + "learning_rate": 6.734693877551021e-06, + "loss": 1.7938, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 26.462881088256836, + "learning_rate": 6.683673469387756e-06, + "loss": 1.8284, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 31.45004653930664, + "learning_rate": 6.63265306122449e-06, + "loss": 2.0328, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 30.525737762451172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.8192, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 25.705707550048828, + "learning_rate": 6.530612244897959e-06, + "loss": 1.8533, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 39.90187454223633, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.9483, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 28.0180721282959, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.8132, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 34.821372985839844, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.9599, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 24.018394470214844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.9248, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 24.074344635009766, + "learning_rate": 6.275510204081633e-06, + "loss": 2.0148, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 31.1939754486084, + "learning_rate": 6.224489795918368e-06, + "loss": 1.8959, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 25.481502532958984, + "learning_rate": 6.173469387755102e-06, + "loss": 1.9832, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 29.6664981842041, + "learning_rate": 6.122448979591837e-06, + "loss": 1.9222, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 26.30698585510254, + "learning_rate": 6.071428571428571e-06, + "loss": 1.9897, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 31.827558517456055, + "learning_rate": 6.020408163265307e-06, + "loss": 1.8615, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 24.80223846435547, + "learning_rate": 5.969387755102042e-06, + "loss": 1.9579, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 36.134700775146484, + "learning_rate": 5.918367346938776e-06, + "loss": 1.723, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 30.388233184814453, + "learning_rate": 5.867346938775511e-06, + "loss": 1.9736, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 32.231563568115234, + "learning_rate": 5.816326530612246e-06, + "loss": 1.9228, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 38.05869674682617, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.9159, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 27.256147384643555, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.8072, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 25.67181396484375, + "learning_rate": 5.663265306122449e-06, + "loss": 1.8633, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 31.8681697845459, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.9822, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 32.85325241088867, + "learning_rate": 5.561224489795919e-06, + "loss": 1.8166, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 35.64312744140625, + "learning_rate": 5.510204081632653e-06, + "loss": 1.7166, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 24.276235580444336, + "learning_rate": 5.459183673469388e-06, + "loss": 1.7593, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 29.371950149536133, + "learning_rate": 5.408163265306123e-06, + "loss": 1.8124, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 23.76220703125, + "learning_rate": 5.357142857142857e-06, + "loss": 1.7775, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 37.103050231933594, + "learning_rate": 5.306122448979593e-06, + "loss": 1.9253, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 20.0811767578125, + "learning_rate": 5.255102040816327e-06, + "loss": 1.8711, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 35.33123016357422, + "learning_rate": 5.204081632653062e-06, + "loss": 1.8764, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 31.880672454833984, + "learning_rate": 5.153061224489796e-06, + "loss": 1.8929, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 21.682334899902344, + "learning_rate": 5.1020408163265315e-06, + "loss": 2.0377, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 34.68608474731445, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.9341, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 25.59632110595703, + "learning_rate": 5e-06, + "loss": 1.8264, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.8502724170684814, + "eval_runtime": 6.6208, + "eval_samples_per_second": 106.483, + "eval_steps_per_second": 53.317, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 33.780616760253906, + "learning_rate": 4.948979591836735e-06, + "loss": 1.8315, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 23.005069732666016, + "learning_rate": 4.897959183673469e-06, + "loss": 1.8434, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 27.338787078857422, + "learning_rate": 4.846938775510204e-06, + "loss": 1.9102, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 26.87493133544922, + "learning_rate": 4.795918367346939e-06, + "loss": 1.8408, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 33.59117126464844, + "learning_rate": 4.744897959183674e-06, + "loss": 1.8633, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 38.98092269897461, + "learning_rate": 4.693877551020409e-06, + "loss": 1.8187, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 28.7203369140625, + "learning_rate": 4.642857142857144e-06, + "loss": 1.8425, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 30.91414451599121, + "learning_rate": 4.591836734693878e-06, + "loss": 1.8526, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 29.04154396057129, + "learning_rate": 4.540816326530613e-06, + "loss": 1.8913, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 29.638099670410156, + "learning_rate": 4.489795918367348e-06, + "loss": 1.8736, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 21.630523681640625, + "learning_rate": 4.438775510204082e-06, + "loss": 1.7407, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 28.39365577697754, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.7503, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 30.86245346069336, + "learning_rate": 4.336734693877551e-06, + "loss": 1.885, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 20.29497718811035, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.8725, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 20.04092025756836, + "learning_rate": 4.234693877551021e-06, + "loss": 1.949, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 26.88593101501465, + "learning_rate": 4.183673469387755e-06, + "loss": 1.8807, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 28.06767463684082, + "learning_rate": 4.13265306122449e-06, + "loss": 1.9135, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 35.69569778442383, + "learning_rate": 4.081632653061225e-06, + "loss": 1.8257, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 29.278770446777344, + "learning_rate": 4.03061224489796e-06, + "loss": 1.8459, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 28.043975830078125, + "learning_rate": 3.979591836734694e-06, + "loss": 1.8821, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 24.060317993164062, + "learning_rate": 3.928571428571429e-06, + "loss": 1.853, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 23.929243087768555, + "learning_rate": 3.877551020408164e-06, + "loss": 1.9249, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 37.94782257080078, + "learning_rate": 3.826530612244898e-06, + "loss": 1.9666, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 26.99836540222168, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.8464, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 26.87177085876465, + "learning_rate": 3.724489795918368e-06, + "loss": 1.9178, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 36.669212341308594, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.8469, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 26.681806564331055, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.9433, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 27.62560272216797, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.8022, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 31.047653198242188, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.9952, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 22.605276107788086, + "learning_rate": 3.469387755102041e-06, + "loss": 1.8992, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 27.210397720336914, + "learning_rate": 3.4183673469387756e-06, + "loss": 2.0905, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 19.93979835510254, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.8109, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 28.999895095825195, + "learning_rate": 3.316326530612245e-06, + "loss": 1.8978, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 28.620771408081055, + "learning_rate": 3.2653061224489794e-06, + "loss": 2.0414, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 28.657215118408203, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.8445, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 33.65087127685547, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.8559, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 28.926881790161133, + "learning_rate": 3.112244897959184e-06, + "loss": 1.9608, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 24.56757926940918, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.8948, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 26.79437828063965, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.7803, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 22.516748428344727, + "learning_rate": 2.959183673469388e-06, + "loss": 1.9039, + "step": 340 + }, + { + "epoch": 0.8600252206809584, + "grad_norm": 28.779983520507812, + "learning_rate": 2.908163265306123e-06, + "loss": 1.9083, + "step": 341 + }, + { + "epoch": 0.862547288776797, + "grad_norm": 23.90164566040039, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.7973, + "step": 342 + }, + { + "epoch": 0.8650693568726355, + "grad_norm": 25.098011016845703, + "learning_rate": 2.8061224489795917e-06, + "loss": 1.8596, + "step": 343 + }, + { + "epoch": 0.8675914249684742, + "grad_norm": 36.381954193115234, + "learning_rate": 2.7551020408163266e-06, + "loss": 1.8069, + "step": 344 + }, + { + "epoch": 0.8701134930643127, + "grad_norm": 33.54197311401367, + "learning_rate": 2.7040816326530615e-06, + "loss": 1.9138, + "step": 345 + }, + { + "epoch": 0.8726355611601513, + "grad_norm": 27.836380004882812, + "learning_rate": 2.6530612244897964e-06, + "loss": 1.8806, + "step": 346 + }, + { + "epoch": 0.8751576292559899, + "grad_norm": 29.386459350585938, + "learning_rate": 2.602040816326531e-06, + "loss": 1.9654, + "step": 347 + }, + { + "epoch": 0.8776796973518285, + "grad_norm": 25.446636199951172, + "learning_rate": 2.5510204081632657e-06, + "loss": 1.8723, + "step": 348 + }, + { + "epoch": 0.880201765447667, + "grad_norm": 23.149751663208008, + "learning_rate": 2.5e-06, + "loss": 1.7802, + "step": 349 + }, + { + "epoch": 0.8827238335435057, + "grad_norm": 24.944000244140625, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.9328, + "step": 350 + }, + { + "epoch": 0.8852459016393442, + "grad_norm": 24.133886337280273, + "learning_rate": 2.3979591836734696e-06, + "loss": 1.8824, + "step": 351 + }, + { + "epoch": 0.8877679697351829, + "grad_norm": 29.368667602539062, + "learning_rate": 2.3469387755102044e-06, + "loss": 1.8283, + "step": 352 + }, + { + "epoch": 0.8902900378310215, + "grad_norm": 27.814085006713867, + "learning_rate": 2.295918367346939e-06, + "loss": 1.8401, + "step": 353 + }, + { + "epoch": 0.89281210592686, + "grad_norm": 36.647239685058594, + "learning_rate": 2.244897959183674e-06, + "loss": 1.9979, + "step": 354 + }, + { + "epoch": 0.8953341740226987, + "grad_norm": 30.776464462280273, + "learning_rate": 2.1938775510204083e-06, + "loss": 1.971, + "step": 355 + }, + { + "epoch": 0.8978562421185372, + "grad_norm": 21.568681716918945, + "learning_rate": 2.1428571428571427e-06, + "loss": 2.0157, + "step": 356 + }, + { + "epoch": 0.9003783102143758, + "grad_norm": 30.973060607910156, + "learning_rate": 2.0918367346938776e-06, + "loss": 1.9058, + "step": 357 + }, + { + "epoch": 0.9029003783102144, + "grad_norm": 25.841272354125977, + "learning_rate": 2.0408163265306125e-06, + "loss": 1.8946, + "step": 358 + }, + { + "epoch": 0.905422446406053, + "grad_norm": 23.440290451049805, + "learning_rate": 1.989795918367347e-06, + "loss": 1.9487, + "step": 359 + }, + { + "epoch": 0.9079445145018915, + "grad_norm": 25.26444435119629, + "learning_rate": 1.938775510204082e-06, + "loss": 1.7457, + "step": 360 + }, + { + "epoch": 0.9104665825977302, + "grad_norm": 26.824586868286133, + "learning_rate": 1.8877551020408163e-06, + "loss": 1.9297, + "step": 361 + }, + { + "epoch": 0.9129886506935687, + "grad_norm": 30.850406646728516, + "learning_rate": 1.8367346938775512e-06, + "loss": 2.0683, + "step": 362 + }, + { + "epoch": 0.9155107187894073, + "grad_norm": 32.09762191772461, + "learning_rate": 1.7857142857142859e-06, + "loss": 1.7749, + "step": 363 + }, + { + "epoch": 0.9180327868852459, + "grad_norm": 27.876068115234375, + "learning_rate": 1.7346938775510206e-06, + "loss": 1.883, + "step": 364 + }, + { + "epoch": 0.9205548549810845, + "grad_norm": 30.165185928344727, + "learning_rate": 1.6836734693877552e-06, + "loss": 1.9148, + "step": 365 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 25.353132247924805, + "learning_rate": 1.6326530612244897e-06, + "loss": 1.8784, + "step": 366 + }, + { + "epoch": 0.9255989911727617, + "grad_norm": 38.13972854614258, + "learning_rate": 1.5816326530612248e-06, + "loss": 1.808, + "step": 367 + }, + { + "epoch": 0.9281210592686002, + "grad_norm": 29.678409576416016, + "learning_rate": 1.5306122448979593e-06, + "loss": 1.882, + "step": 368 + }, + { + "epoch": 0.9306431273644389, + "grad_norm": 19.059682846069336, + "learning_rate": 1.479591836734694e-06, + "loss": 1.8964, + "step": 369 + }, + { + "epoch": 0.9331651954602774, + "grad_norm": 26.248071670532227, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.8685, + "step": 370 + }, + { + "epoch": 0.935687263556116, + "grad_norm": 28.667518615722656, + "learning_rate": 1.3775510204081633e-06, + "loss": 1.947, + "step": 371 + }, + { + "epoch": 0.9382093316519546, + "grad_norm": 27.218080520629883, + "learning_rate": 1.3265306122448982e-06, + "loss": 1.9719, + "step": 372 + }, + { + "epoch": 0.9407313997477932, + "grad_norm": 36.85118865966797, + "learning_rate": 1.2755102040816329e-06, + "loss": 1.8174, + "step": 373 + }, + { + "epoch": 0.9432534678436317, + "grad_norm": 24.54947280883789, + "learning_rate": 1.2244897959183673e-06, + "loss": 1.9564, + "step": 374 + }, + { + "epoch": 0.9457755359394704, + "grad_norm": 23.265106201171875, + "learning_rate": 1.1734693877551022e-06, + "loss": 1.9667, + "step": 375 + }, + { + "epoch": 0.9482976040353089, + "grad_norm": 25.103755950927734, + "learning_rate": 1.122448979591837e-06, + "loss": 1.8938, + "step": 376 + }, + { + "epoch": 0.9508196721311475, + "grad_norm": 28.593093872070312, + "learning_rate": 1.0714285714285714e-06, + "loss": 2.1103, + "step": 377 + }, + { + "epoch": 0.9533417402269861, + "grad_norm": 24.21011734008789, + "learning_rate": 1.0204081632653063e-06, + "loss": 1.695, + "step": 378 + }, + { + "epoch": 0.9558638083228247, + "grad_norm": 31.19601821899414, + "learning_rate": 9.69387755102041e-07, + "loss": 1.9795, + "step": 379 + }, + { + "epoch": 0.9583858764186634, + "grad_norm": 31.64060401916504, + "learning_rate": 9.183673469387756e-07, + "loss": 1.8726, + "step": 380 + }, + { + "epoch": 0.9609079445145019, + "grad_norm": 26.76803207397461, + "learning_rate": 8.673469387755103e-07, + "loss": 1.9005, + "step": 381 + }, + { + "epoch": 0.9634300126103404, + "grad_norm": 29.592342376708984, + "learning_rate": 8.163265306122449e-07, + "loss": 1.8033, + "step": 382 + }, + { + "epoch": 0.9659520807061791, + "grad_norm": 29.41642951965332, + "learning_rate": 7.653061224489796e-07, + "loss": 2.0594, + "step": 383 + }, + { + "epoch": 0.9684741488020177, + "grad_norm": 17.477853775024414, + "learning_rate": 7.142857142857143e-07, + "loss": 1.8088, + "step": 384 + }, + { + "epoch": 0.9709962168978562, + "grad_norm": 31.39056396484375, + "learning_rate": 6.632653061224491e-07, + "loss": 1.8812, + "step": 385 + }, + { + "epoch": 0.9735182849936949, + "grad_norm": 34.46897888183594, + "learning_rate": 6.122448979591837e-07, + "loss": 1.9868, + "step": 386 + }, + { + "epoch": 0.9760403530895334, + "grad_norm": 29.819665908813477, + "learning_rate": 5.612244897959184e-07, + "loss": 1.8238, + "step": 387 + }, + { + "epoch": 0.978562421185372, + "grad_norm": 29.0477237701416, + "learning_rate": 5.102040816326531e-07, + "loss": 1.8195, + "step": 388 + }, + { + "epoch": 0.9810844892812106, + "grad_norm": 26.244121551513672, + "learning_rate": 4.591836734693878e-07, + "loss": 1.9176, + "step": 389 + }, + { + "epoch": 0.9836065573770492, + "grad_norm": 29.151588439941406, + "learning_rate": 4.0816326530612243e-07, + "loss": 1.9222, + "step": 390 + }, + { + "epoch": 0.9861286254728878, + "grad_norm": 25.87322235107422, + "learning_rate": 3.5714285714285716e-07, + "loss": 1.9176, + "step": 391 + }, + { + "epoch": 0.9886506935687264, + "grad_norm": 36.852996826171875, + "learning_rate": 3.0612244897959183e-07, + "loss": 1.816, + "step": 392 + }, + { + "epoch": 0.9911727616645649, + "grad_norm": 29.98614501953125, + "learning_rate": 2.5510204081632656e-07, + "loss": 1.9229, + "step": 393 + }, + { + "epoch": 0.9936948297604036, + "grad_norm": 42.172603607177734, + "learning_rate": 2.0408163265306121e-07, + "loss": 1.9613, + "step": 394 + }, + { + "epoch": 0.9962168978562421, + "grad_norm": 35.004024505615234, + "learning_rate": 1.5306122448979592e-07, + "loss": 1.9607, + "step": 395 + }, + { + "epoch": 0.9987389659520807, + "grad_norm": 27.764902114868164, + "learning_rate": 1.0204081632653061e-07, + "loss": 1.834, + "step": 396 + }, + { + "epoch": 1.0, + "grad_norm": 37.669044494628906, + "learning_rate": 5.1020408163265303e-08, + "loss": 1.7949, + "step": 397 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 5796115339886592.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-397/training_args.bin b/checkpoints/checkpoint-397/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-397/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-40/README.md b/checkpoints/checkpoint-40/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-40/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-40/adapter_config.json b/checkpoints/checkpoint-40/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-40/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-40/adapter_model.safetensors b/checkpoints/checkpoint-40/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c99f5e16c8deae2d209db62a06d4c01fbb617309 --- /dev/null +++ b/checkpoints/checkpoint-40/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c2d6037d0ab6edc2f7df889ca8021320243255b81b97f772213081ce585f36c +size 41248 diff --git a/checkpoints/checkpoint-40/chat_template.jinja b/checkpoints/checkpoint-40/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-40/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-40/optimizer.pt b/checkpoints/checkpoint-40/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..88176e17a946158d4f28d0968e7cd84a8626b558 --- /dev/null +++ b/checkpoints/checkpoint-40/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d73f75fe493d0e700e244d9cd0986bacbed1b14e759355890ad082cd360c06d5 +size 38889 diff --git a/checkpoints/checkpoint-40/rng_state.pth b/checkpoints/checkpoint-40/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-40/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-40/scheduler.pt b/checkpoints/checkpoint-40/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b1f64a206ecdaf12302f8c61fbdfcdf3d401770 --- /dev/null +++ b/checkpoints/checkpoint-40/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee7320820b24f6e9e145917287113edf73d343aa5dc43801b0de920e78fe1d07 +size 1465 diff --git a/checkpoints/checkpoint-40/special_tokens_map.json b/checkpoints/checkpoint-40/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-40/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-40/tokenizer.json b/checkpoints/checkpoint-40/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-40/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-40/tokenizer_config.json b/checkpoints/checkpoint-40/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-40/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-40/trainer_state.json b/checkpoints/checkpoint-40/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5b4ef26437c18c68e6aab6e28c00d413520567b2 --- /dev/null +++ b/checkpoints/checkpoint-40/trainer_state.json @@ -0,0 +1,314 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.1008827238335435, + "eval_steps": 100, + "global_step": 40, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 578673800527872.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-40/training_args.bin b/checkpoints/checkpoint-40/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-40/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-50/README.md b/checkpoints/checkpoint-50/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-50/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-50/adapter_config.json b/checkpoints/checkpoint-50/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-50/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-50/adapter_model.safetensors b/checkpoints/checkpoint-50/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84db9f2978a33c47e188b05dcaeb6e8fdfb4dd0a --- /dev/null +++ b/checkpoints/checkpoint-50/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71c36574ab899a8775284ff0f33f37f9715fc0ba3072916bd9a23f0f2ecfea05 +size 41248 diff --git a/checkpoints/checkpoint-50/chat_template.jinja b/checkpoints/checkpoint-50/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-50/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-50/optimizer.pt b/checkpoints/checkpoint-50/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..eab3dee69061ede2ea626ad0510796e02f9d1b84 --- /dev/null +++ b/checkpoints/checkpoint-50/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de8cb935c5ed3d1a91a550683706d734107cbb72401b6abb12e53d0477626149 +size 38889 diff --git a/checkpoints/checkpoint-50/rng_state.pth b/checkpoints/checkpoint-50/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-50/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-50/scheduler.pt b/checkpoints/checkpoint-50/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a7d6934a47813cf67df98390376c6e7782e2629 --- /dev/null +++ b/checkpoints/checkpoint-50/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39dbe5d688e2941da9b24366bdc398946537cb9fbf130677019ec7d7c46b6d9c +size 1465 diff --git a/checkpoints/checkpoint-50/special_tokens_map.json b/checkpoints/checkpoint-50/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-50/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-50/tokenizer.json b/checkpoints/checkpoint-50/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-50/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-50/tokenizer_config.json b/checkpoints/checkpoint-50/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-50/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-50/trainer_state.json b/checkpoints/checkpoint-50/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4d9b55b673cc27e1c292f365b01d22749072cb9c --- /dev/null +++ b/checkpoints/checkpoint-50/trainer_state.json @@ -0,0 +1,384 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.12610340479192939, + "eval_steps": 100, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 724472085184512.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-50/training_args.bin b/checkpoints/checkpoint-50/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-50/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-60/README.md b/checkpoints/checkpoint-60/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-60/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-60/adapter_config.json b/checkpoints/checkpoint-60/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-60/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-60/adapter_model.safetensors b/checkpoints/checkpoint-60/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff83a38569ff18f37870c27d55d8bc06b6100072 --- /dev/null +++ b/checkpoints/checkpoint-60/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11a9c57eb6b2c2ae720d6f677b5180001590539a7f975a45d8acdd6ed5452139 +size 41248 diff --git a/checkpoints/checkpoint-60/chat_template.jinja b/checkpoints/checkpoint-60/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-60/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-60/optimizer.pt b/checkpoints/checkpoint-60/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..64dd0d1e7a404ba1987c258459aa0eb48f71320a --- /dev/null +++ b/checkpoints/checkpoint-60/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bace3eb1673b88793b9a72eb80e3a6b00ca935c54dd84054a52ecd2f68a490d9 +size 38889 diff --git a/checkpoints/checkpoint-60/rng_state.pth b/checkpoints/checkpoint-60/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-60/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-60/scheduler.pt b/checkpoints/checkpoint-60/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..923f00fac4bdd50e26ccfc6475f5b4094c9fefb4 --- /dev/null +++ b/checkpoints/checkpoint-60/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f5f48399c27701bd2074faaab5e8762b9db88f1ccf0640167a823b2052d7dba +size 1465 diff --git a/checkpoints/checkpoint-60/special_tokens_map.json b/checkpoints/checkpoint-60/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-60/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-60/tokenizer.json b/checkpoints/checkpoint-60/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-60/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-60/tokenizer_config.json b/checkpoints/checkpoint-60/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-60/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-60/trainer_state.json b/checkpoints/checkpoint-60/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d2f3fcfc0b97fcc0001c21db1809cd9039af2929 --- /dev/null +++ b/checkpoints/checkpoint-60/trainer_state.json @@ -0,0 +1,454 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.15132408575031525, + "eval_steps": 100, + "global_step": 60, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 871414801711104.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-60/training_args.bin b/checkpoints/checkpoint-60/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-60/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-70/README.md b/checkpoints/checkpoint-70/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-70/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-70/adapter_config.json b/checkpoints/checkpoint-70/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-70/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-70/adapter_model.safetensors b/checkpoints/checkpoint-70/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca6a7de5033b2183f1ed63cd019f28e57d2c697c --- /dev/null +++ b/checkpoints/checkpoint-70/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:653da30be1249a0d8ea0cc623650aeef3a67cbcd32b5fbc60d8f8ac9d0f7d64f +size 41248 diff --git a/checkpoints/checkpoint-70/chat_template.jinja b/checkpoints/checkpoint-70/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-70/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-70/optimizer.pt b/checkpoints/checkpoint-70/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..56ea1612536d39ba242cf0c09bd5e037b6b399f4 --- /dev/null +++ b/checkpoints/checkpoint-70/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:482aa28e2debd103e4a8b2b15813c8d40fc2d331bb0b32a7a5ca9cd9c12c3935 +size 38889 diff --git a/checkpoints/checkpoint-70/rng_state.pth b/checkpoints/checkpoint-70/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-70/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-70/scheduler.pt b/checkpoints/checkpoint-70/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0fdf10f317e599a4f76181a03963bc88e092c469 --- /dev/null +++ b/checkpoints/checkpoint-70/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51f05905443502cc2771a1e39c2f819c6f57bbb38b36e5ecbcb7476426c46ea3 +size 1465 diff --git a/checkpoints/checkpoint-70/special_tokens_map.json b/checkpoints/checkpoint-70/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-70/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-70/tokenizer.json b/checkpoints/checkpoint-70/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-70/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-70/tokenizer_config.json b/checkpoints/checkpoint-70/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-70/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-70/trainer_state.json b/checkpoints/checkpoint-70/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..68b505aba3491200331a6a0e2fc7cf5675a24420 --- /dev/null +++ b/checkpoints/checkpoint-70/trainer_state.json @@ -0,0 +1,524 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.17654476670870115, + "eval_steps": 100, + "global_step": 70, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1014445429702656.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-70/training_args.bin b/checkpoints/checkpoint-70/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-70/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-80/README.md b/checkpoints/checkpoint-80/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-80/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-80/adapter_config.json b/checkpoints/checkpoint-80/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-80/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-80/adapter_model.safetensors b/checkpoints/checkpoint-80/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..40874a5a45538e8fdc3bb60397d4a3a565a7a4fd --- /dev/null +++ b/checkpoints/checkpoint-80/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:466e5da2abbb873857f76b6ebc992e5aa43f6f18a430fd0f0c437365e6b118d4 +size 41248 diff --git a/checkpoints/checkpoint-80/chat_template.jinja b/checkpoints/checkpoint-80/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-80/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-80/optimizer.pt b/checkpoints/checkpoint-80/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b6994f2bc126cccfafd130017a4a90ac62eb80ba --- /dev/null +++ b/checkpoints/checkpoint-80/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d5d20b3d4ecffd3b9019a9af93547cb1b7bc197fe3e59fd92207e3b8ecb4449 +size 38889 diff --git a/checkpoints/checkpoint-80/rng_state.pth b/checkpoints/checkpoint-80/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-80/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-80/scheduler.pt b/checkpoints/checkpoint-80/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..37aeeb88378d00d5883234290b278e0e9279f98d --- /dev/null +++ b/checkpoints/checkpoint-80/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82f2095f9149a6919117af9a2ec29bf7d1ddaff4c98f8f7422a5360c32c5e70c +size 1465 diff --git a/checkpoints/checkpoint-80/special_tokens_map.json b/checkpoints/checkpoint-80/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-80/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-80/tokenizer.json b/checkpoints/checkpoint-80/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-80/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-80/tokenizer_config.json b/checkpoints/checkpoint-80/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-80/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-80/trainer_state.json b/checkpoints/checkpoint-80/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..99904029f2ad11ebe3e775a9f5bd77070142a8a4 --- /dev/null +++ b/checkpoints/checkpoint-80/trainer_state.json @@ -0,0 +1,594 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.201765447667087, + "eval_steps": 100, + "global_step": 80, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1160267070111744.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-80/training_args.bin b/checkpoints/checkpoint-80/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-80/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/checkpoints/checkpoint-90/README.md b/checkpoints/checkpoint-90/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b974a935220f493e52a3da346206a652479b4735 --- /dev/null +++ b/checkpoints/checkpoint-90/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-90/adapter_config.json b/checkpoints/checkpoint-90/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0d31afa2216e2d6d2237fc15eef0c8c03ca674 --- /dev/null +++ b/checkpoints/checkpoint-90/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-90/adapter_model.safetensors b/checkpoints/checkpoint-90/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9005a44673340d6351d1a010ee312d6eef97d2f2 --- /dev/null +++ b/checkpoints/checkpoint-90/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11dd7d9921548194cf400a26996c4cfc605d15c319818bb6df262c1fc70c6ee9 +size 41248 diff --git a/checkpoints/checkpoint-90/chat_template.jinja b/checkpoints/checkpoint-90/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1bad6a0f648dccdbec523ca79ba90fbcfc806af0 --- /dev/null +++ b/checkpoints/checkpoint-90/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-90/optimizer.pt b/checkpoints/checkpoint-90/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..db124867fc6032cccd210b942ae79f56fdd54532 --- /dev/null +++ b/checkpoints/checkpoint-90/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a1fc76545348b5aedbc3526d94e2f705b18419029205ba18963d9e957c7f360 +size 38889 diff --git a/checkpoints/checkpoint-90/rng_state.pth b/checkpoints/checkpoint-90/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-90/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-90/scheduler.pt b/checkpoints/checkpoint-90/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..377b4f0a8064e5e9532e65a3616c226b97f2044b --- /dev/null +++ b/checkpoints/checkpoint-90/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c1d2d2aa2c56b7686bcd5878fbde1912712df0944052de1b730fa940e63bcb4 +size 1465 diff --git a/checkpoints/checkpoint-90/special_tokens_map.json b/checkpoints/checkpoint-90/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-90/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-90/tokenizer.json b/checkpoints/checkpoint-90/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-90/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-90/tokenizer_config.json b/checkpoints/checkpoint-90/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-90/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-90/trainer_state.json b/checkpoints/checkpoint-90/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..77d4ae3be4f5d8ced1f6f2933d602bfd42adb6ce --- /dev/null +++ b/checkpoints/checkpoint-90/trainer_state.json @@ -0,0 +1,664 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.22698612862547288, + "eval_steps": 100, + "global_step": 90, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 25.350475311279297, + "learning_rate": 0.0, + "loss": 2.5568, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 24.538068771362305, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7748, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 23.780784606933594, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5911, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 24.780380249023438, + "learning_rate": 1.2e-05, + "loss": 2.8427, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 22.699949264526367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.709, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 22.106008529663086, + "learning_rate": 2e-05, + "loss": 2.5854, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 22.497045516967773, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.5427, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 27.103275299072266, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.6599, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 21.081985473632812, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.5145, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 25.964981079101562, + "learning_rate": 1.979591836734694e-05, + "loss": 2.4247, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 25.353195190429688, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.5092, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 18.94191551208496, + "learning_rate": 1.969387755102041e-05, + "loss": 2.4335, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 23.60140037536621, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.544, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 24.298965454101562, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.4987, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.745506286621094, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.4985, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 22.54330062866211, + "learning_rate": 1.948979591836735e-05, + "loss": 2.6892, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 21.46229362487793, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.3998, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 20.54530143737793, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.4244, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 18.8839111328125, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.3911, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.924652099609375, + "learning_rate": 1.928571428571429e-05, + "loss": 2.3588, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.996627807617188, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.3727, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 18.584613800048828, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2974, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 14.309200286865234, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.4843, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.074164390563965, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.4043, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 13.610542297363281, + "learning_rate": 1.903061224489796e-05, + "loss": 2.3762, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 15.666613578796387, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.3249, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 14.475164413452148, + "learning_rate": 1.892857142857143e-05, + "loss": 2.3317, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 16.231687545776367, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.5064, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 16.8968563079834, + "learning_rate": 1.88265306122449e-05, + "loss": 2.3932, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 17.74305534362793, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.3329, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 16.41620445251465, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.3982, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 17.965959548950195, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.4227, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 19.92589569091797, + "learning_rate": 1.862244897959184e-05, + "loss": 2.5255, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 20.62932586669922, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.1816, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 18.360614776611328, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.2827, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 19.199546813964844, + "learning_rate": 1.8469387755102043e-05, + "loss": 2.1498, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 22.727521896362305, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.3811, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 19.80649757385254, + "learning_rate": 1.836734693877551e-05, + "loss": 2.2342, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.24563217163086, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.2287, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 25.384042739868164, + "learning_rate": 1.826530612244898e-05, + "loss": 2.1259, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 23.417089462280273, + "learning_rate": 1.8214285714285715e-05, + "loss": 2.0858, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 27.639497756958008, + "learning_rate": 1.816326530612245e-05, + "loss": 2.2243, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 27.390850067138672, + "learning_rate": 1.8112244897959187e-05, + "loss": 2.1314, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 27.956937789916992, + "learning_rate": 1.806122448979592e-05, + "loss": 2.1755, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 32.09632873535156, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.2365, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 33.84647750854492, + "learning_rate": 1.795918367346939e-05, + "loss": 2.1671, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 32.027130126953125, + "learning_rate": 1.7908163265306123e-05, + "loss": 2.09, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 35.423587799072266, + "learning_rate": 1.785714285714286e-05, + "loss": 2.2479, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 31.041240692138672, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9687, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 28.790103912353516, + "learning_rate": 1.7755102040816327e-05, + "loss": 2.1428, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.089313507080078, + "learning_rate": 1.7704081632653062e-05, + "loss": 2.0673, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 26.493867874145508, + "learning_rate": 1.7653061224489798e-05, + "loss": 2.0814, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 19.993173599243164, + "learning_rate": 1.760204081632653e-05, + "loss": 2.005, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 21.89765167236328, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.2262, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 23.22844123840332, + "learning_rate": 1.7500000000000002e-05, + "loss": 2.0208, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 15.864526748657227, + "learning_rate": 1.7448979591836738e-05, + "loss": 2.0153, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 21.451187133789062, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.1386, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 18.089811325073242, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.9517, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 24.029157638549805, + "learning_rate": 1.729591836734694e-05, + "loss": 1.9719, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 18.722776412963867, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0623, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 20.211933135986328, + "learning_rate": 1.719387755102041e-05, + "loss": 2.0081, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 17.61188507080078, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.8484, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 20.118955612182617, + "learning_rate": 1.7091836734693878e-05, + "loss": 2.0799, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 17.271841049194336, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.9832, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 19.521392822265625, + "learning_rate": 1.698979591836735e-05, + "loss": 1.9129, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 22.660900115966797, + "learning_rate": 1.6938775510204085e-05, + "loss": 2.118, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 17.332427978515625, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.9632, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 22.42765998840332, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.954, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 23.6208553314209, + "learning_rate": 1.678571428571429e-05, + "loss": 1.9917, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 19.78505516052246, + "learning_rate": 1.673469387755102e-05, + "loss": 1.7964, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 19.453041076660156, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.9587, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 24.731407165527344, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.9945, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 20.977611541748047, + "learning_rate": 1.6581632653061225e-05, + "loss": 2.0617, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 22.959585189819336, + "learning_rate": 1.653061224489796e-05, + "loss": 1.98, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 21.952653884887695, + "learning_rate": 1.6479591836734696e-05, + "loss": 2.1094, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 22.320383071899414, + "learning_rate": 1.642857142857143e-05, + "loss": 1.8418, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 24.375411987304688, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.8428, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 19.64323616027832, + "learning_rate": 1.63265306122449e-05, + "loss": 1.9194, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 22.459064483642578, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.6649, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 36.789764404296875, + "learning_rate": 1.6224489795918368e-05, + "loss": 2.0131, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 22.109119415283203, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.9603, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 19.196834564208984, + "learning_rate": 1.612244897959184e-05, + "loss": 2.0538, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 26.870800018310547, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.9168, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 35.190696716308594, + "learning_rate": 1.6020408163265308e-05, + "loss": 2.0149, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 19.963472366333008, + "learning_rate": 1.596938775510204e-05, + "loss": 1.7871, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 20.292407989501953, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.944, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 20.55329132080078, + "learning_rate": 1.586734693877551e-05, + "loss": 2.0175, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 17.27350616455078, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.9308, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 22.471134185791016, + "learning_rate": 1.576530612244898e-05, + "loss": 1.9221, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 25.098316192626953, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.9359, + "step": 90 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1305493138833408.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-90/training_args.bin b/checkpoints/checkpoint-90/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/checkpoints/checkpoint-90/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..866f8d7e39db21daf30a11de7c14a3ccb8f2e9aa --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591380c2089627b1aa728bfd52abdb50afeaafc7a7f48353dede4e443fc370d0 +size 5969